// SPDX-License-Identifier: GPL-2.0-only
/*
 * Driver for the Atmel Extensible DMA Controller (aka XDMAC on AT91 systems)
 *
 * Copyright (C) 2014 Atmel Corporation
 *
 * Author: Ludovic Desroches <ludovic.desroches@atmel.com>
 */

#include <asm/barrier.h>
#include <dt-bindings/dma/at91.h>
#include <linux/clk.h>
#include <linux/dmaengine.h>
#include <linux/dmapool.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/of_dma.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/pm.h>
#include <linux/pm_runtime.h>

#include "dmaengine.h"

/* Global registers */
#define AT_XDMAC_GTYPE		0x00	/* Global Type Register */
#define		AT_XDMAC_NB_CH(i)	(((i) & 0x1F) + 1)		/* Number of Channels Minus One */
#define		AT_XDMAC_FIFO_SZ(i)	(((i) >> 5) & 0x7FF)		/* Number of Bytes */
#define		AT_XDMAC_NB_REQ(i)	((((i) >> 16) & 0x3F) + 1)	/* Number of Peripheral Requests Minus One */
#define AT_XDMAC_GCFG		0x04	/* Global Configuration Register */
#define		AT_XDMAC_WRHP(i)		(((i) & 0xF) << 4)
#define		AT_XDMAC_WRMP(i)		(((i) & 0xF) << 8)
#define		AT_XDMAC_WRLP(i)		(((i) & 0xF) << 12)
#define		AT_XDMAC_RDHP(i)		(((i) & 0xF) << 16)
#define		AT_XDMAC_RDMP(i)		(((i) & 0xF) << 20)
#define		AT_XDMAC_RDLP(i)		(((i) & 0xF) << 24)
#define		AT_XDMAC_RDSG(i)		(((i) & 0xF) << 28)
#define AT_XDMAC_GCFG_M2M	(AT_XDMAC_RDLP(0xF) | AT_XDMAC_WRLP(0xF))
#define AT_XDMAC_GCFG_P2M	(AT_XDMAC_RDSG(0x1) | AT_XDMAC_RDHP(0x3) | \
				AT_XDMAC_WRHP(0x5))
#define AT_XDMAC_GWAC		0x08	/* Global Weighted Arbiter Configuration Register */
#define		AT_XDMAC_PW0(i)		(((i) & 0xF) << 0)
#define		AT_XDMAC_PW1(i)		(((i) & 0xF) << 4)
#define		AT_XDMAC_PW2(i)		(((i) & 0xF) << 8)
#define		AT_XDMAC_PW3(i)		(((i) & 0xF) << 12)
#define AT_XDMAC_GWAC_M2M	0
#define AT_XDMAC_GWAC_P2M	(AT_XDMAC_PW0(0xF) | AT_XDMAC_PW2(0xF))

#define AT_XDMAC_GIE		0x0C	/* Global Interrupt Enable Register */
#define AT_XDMAC_GID		0x10	/* Global Interrupt Disable Register */
#define AT_XDMAC_GIM		0x14	/* Global Interrupt Mask Register */
#define AT_XDMAC_GIS		0x18	/* Global Interrupt Status Register */
#define AT_XDMAC_GE		0x1C	/* Global Channel Enable Register */
#define AT_XDMAC_GD		0x20	/* Global Channel Disable Register */
#define AT_XDMAC_GS		0x24	/* Global Channel Status Register */
#define AT_XDMAC_VERSION	0xFFC	/* XDMAC Version Register */

/* Channel relative registers offsets */
#define AT_XDMAC_CIE		0x00	/* Channel Interrupt Enable Register */
#define		AT_XDMAC_CIE_BIE	BIT(0)	/* End of Block Interrupt Enable Bit */
#define		AT_XDMAC_CIE_LIE	BIT(1)	/* End of Linked List Interrupt Enable Bit */
#define		AT_XDMAC_CIE_DIE	BIT(2)	/* End of Disable Interrupt Enable Bit */
#define		AT_XDMAC_CIE_FIE	BIT(3)	/* End of Flush Interrupt Enable Bit */
#define		AT_XDMAC_CIE_RBEIE	BIT(4)	/* Read Bus Error Interrupt Enable Bit */
#define		AT_XDMAC_CIE_WBEIE	BIT(5)	/* Write Bus Error Interrupt Enable Bit */
#define		AT_XDMAC_CIE_ROIE	BIT(6)	/* Request Overflow Interrupt Enable Bit */
#define AT_XDMAC_CID		0x04	/* Channel Interrupt Disable Register */
#define		AT_XDMAC_CID_BID	BIT(0)	/* End of Block Interrupt Disable Bit */
#define		AT_XDMAC_CID_LID	BIT(1)	/* End of Linked List Interrupt Disable Bit */
#define		AT_XDMAC_CID_DID	BIT(2)	/* End of Disable Interrupt Disable Bit */
#define		AT_XDMAC_CID_FID	BIT(3)	/* End of Flush Interrupt Disable Bit */
#define		AT_XDMAC_CID_RBEID	BIT(4)	/* Read Bus Error Interrupt Disable Bit */
#define		AT_XDMAC_CID_WBEID	BIT(5)	/* Write Bus Error Interrupt Disable Bit */
#define		AT_XDMAC_CID_ROID	BIT(6)	/* Request Overflow Interrupt Disable Bit */
#define AT_XDMAC_CIM		0x08	/* Channel Interrupt Mask Register */
#define		AT_XDMAC_CIM_BIM	BIT(0)	/* End of Block Interrupt Mask Bit */
#define		AT_XDMAC_CIM_LIM	BIT(1)	/* End of Linked List Interrupt Mask Bit */
#define		AT_XDMAC_CIM_DIM	BIT(2)	/* End of Disable Interrupt Mask Bit */
#define		AT_XDMAC_CIM_FIM	BIT(3)	/* End of Flush Interrupt Mask Bit */
#define		AT_XDMAC_CIM_RBEIM	BIT(4)	/* Read Bus Error Interrupt Mask Bit */
#define		AT_XDMAC_CIM_WBEIM	BIT(5)	/* Write Bus Error Interrupt Mask Bit */
#define		AT_XDMAC_CIM_ROIM	BIT(6)	/* Request Overflow Interrupt Mask Bit */
#define AT_XDMAC_CIS		0x0C	/* Channel Interrupt Status Register */
#define		AT_XDMAC_CIS_BIS	BIT(0)	/* End of Block Interrupt Status Bit */
#define		AT_XDMAC_CIS_LIS	BIT(1)	/* End of Linked List Interrupt Status Bit */
#define		AT_XDMAC_CIS_DIS	BIT(2)	/* End of Disable Interrupt Status Bit */
#define		AT_XDMAC_CIS_FIS	BIT(3)	/* End of Flush Interrupt Status Bit */
#define		AT_XDMAC_CIS_RBEIS	BIT(4)	/* Read Bus Error Interrupt Status Bit */
#define		AT_XDMAC_CIS_WBEIS	BIT(5)	/* Write Bus Error Interrupt Status Bit */
#define		AT_XDMAC_CIS_ROIS	BIT(6)	/* Request Overflow Interrupt Status Bit */
#define AT_XDMAC_CSA		0x10	/* Channel Source Address Register */
#define AT_XDMAC_CDA		0x14	/* Channel Destination Address Register */
#define AT_XDMAC_CNDA		0x18	/* Channel Next Descriptor Address Register */
#define		AT_XDMAC_CNDA_NDAIF(i)	((i) & 0x1)			/* Channel x Next Descriptor Interface */
#define		AT_XDMAC_CNDA_NDA(i)	((i) & 0xfffffffc)		/* Channel x Next Descriptor Address */
#define AT_XDMAC_CNDC		0x1C	/* Channel Next Descriptor Control Register */
#define		AT_XDMAC_CNDC_NDE		(0x1 << 0)		/* Channel x Next Descriptor Enable */
#define		AT_XDMAC_CNDC_NDSUP		(0x1 << 1)		/* Channel x Next Descriptor Source Update */
#define		AT_XDMAC_CNDC_NDDUP		(0x1 << 2)		/* Channel x Next Descriptor Destination Update */
#define		AT_XDMAC_CNDC_NDVIEW_MASK	GENMASK(28, 27)
#define		AT_XDMAC_CNDC_NDVIEW_NDV0	(0x0 << 3)		/* Channel x Next Descriptor View 0 */
#define		AT_XDMAC_CNDC_NDVIEW_NDV1	(0x1 << 3)		/* Channel x Next Descriptor View 1 */
#define		AT_XDMAC_CNDC_NDVIEW_NDV2	(0x2 << 3)		/* Channel x Next Descriptor View 2 */
#define		AT_XDMAC_CNDC_NDVIEW_NDV3	(0x3 << 3)		/* Channel x Next Descriptor View 3 */
#define AT_XDMAC_CUBC		0x20	/* Channel Microblock Control Register */
#define AT_XDMAC_CBC		0x24	/* Channel Block Control Register */
#define AT_XDMAC_CC		0x28	/* Channel Configuration Register */
#define		AT_XDMAC_CC_TYPE	(0x1 << 0)	/* Channel Transfer Type */
#define			AT_XDMAC_CC_TYPE_MEM_TRAN	(0x0 << 0)	/* Memory to Memory Transfer */
#define			AT_XDMAC_CC_TYPE_PER_TRAN	(0x1 << 0)	/* Peripheral to Memory or Memory to Peripheral Transfer */
#define		AT_XDMAC_CC_MBSIZE_MASK	(0x3 << 1)
#define			AT_XDMAC_CC_MBSIZE_SINGLE	(0x0 << 1)
#define			AT_XDMAC_CC_MBSIZE_FOUR		(0x1 << 1)
#define			AT_XDMAC_CC_MBSIZE_EIGHT	(0x2 << 1)
#define			AT_XDMAC_CC_MBSIZE_SIXTEEN	(0x3 << 1)
#define		AT_XDMAC_CC_DSYNC	(0x1 << 4)	/* Channel Synchronization */
#define			AT_XDMAC_CC_DSYNC_PER2MEM	(0x0 << 4)
#define			AT_XDMAC_CC_DSYNC_MEM2PER	(0x1 << 4)
#define		AT_XDMAC_CC_PROT	(0x1 << 5)	/* Channel Protection */
#define			AT_XDMAC_CC_PROT_SEC		(0x0 << 5)
#define			AT_XDMAC_CC_PROT_UNSEC		(0x1 << 5)
#define		AT_XDMAC_CC_SWREQ	(0x1 << 6)	/* Channel Software Request Trigger */
#define			AT_XDMAC_CC_SWREQ_HWR_CONNECTED	(0x0 << 6)
#define			AT_XDMAC_CC_SWREQ_SWR_CONNECTED	(0x1 << 6)
#define		AT_XDMAC_CC_MEMSET	(0x1 << 7)	/* Channel Fill Block of memory */
#define			AT_XDMAC_CC_MEMSET_NORMAL_MODE	(0x0 << 7)
#define			AT_XDMAC_CC_MEMSET_HW_MODE	(0x1 << 7)
#define		AT_XDMAC_CC_CSIZE(i)	((0x7 & (i)) << 8)	/* Channel Chunk Size */
#define		AT_XDMAC_CC_DWIDTH_OFFSET	11
#define		AT_XDMAC_CC_DWIDTH_MASK	(0x3 << AT_XDMAC_CC_DWIDTH_OFFSET)
#define		AT_XDMAC_CC_DWIDTH(i)	((0x3 & (i)) << AT_XDMAC_CC_DWIDTH_OFFSET)	/* Channel Data Width */
#define			AT_XDMAC_CC_DWIDTH_BYTE		0x0
#define			AT_XDMAC_CC_DWIDTH_HALFWORD	0x1
#define			AT_XDMAC_CC_DWIDTH_WORD		0x2
#define			AT_XDMAC_CC_DWIDTH_DWORD	0x3
#define		AT_XDMAC_CC_SIF(i)	((0x1 & (i)) << 13)	/* Channel Source Interface Identifier */
#define		AT_XDMAC_CC_DIF(i)	((0x1 & (i)) << 14)	/* Channel Destination Interface Identifier */
#define		AT_XDMAC_CC_SAM_MASK	(0x3 << 16)	/* Channel Source Addressing Mode */
#define			AT_XDMAC_CC_SAM_FIXED_AM	(0x0 << 16)
#define			AT_XDMAC_CC_SAM_INCREMENTED_AM	(0x1 << 16)
#define			AT_XDMAC_CC_SAM_UBS_AM		(0x2 << 16)
#define			AT_XDMAC_CC_SAM_UBS_DS_AM	(0x3 << 16)
#define		AT_XDMAC_CC_DAM_MASK	(0x3 << 18)	/* Channel Source Addressing Mode */
#define			AT_XDMAC_CC_DAM_FIXED_AM	(0x0 << 18)
#define			AT_XDMAC_CC_DAM_INCREMENTED_AM	(0x1 << 18)
#define			AT_XDMAC_CC_DAM_UBS_AM		(0x2 << 18)
#define			AT_XDMAC_CC_DAM_UBS_DS_AM	(0x3 << 18)
#define		AT_XDMAC_CC_INITD	(0x1 << 21)	/* Channel Initialization Terminated (read only) */
#define			AT_XDMAC_CC_INITD_TERMINATED	(0x0 << 21)
#define			AT_XDMAC_CC_INITD_IN_PROGRESS	(0x1 << 21)
#define		AT_XDMAC_CC_RDIP	(0x1 << 22)	/* Read in Progress (read only) */
#define			AT_XDMAC_CC_RDIP_DONE		(0x0 << 22)
#define			AT_XDMAC_CC_RDIP_IN_PROGRESS	(0x1 << 22)
#define		AT_XDMAC_CC_WRIP	(0x1 << 23)	/* Write in Progress (read only) */
#define			AT_XDMAC_CC_WRIP_DONE		(0x0 << 23)
#define			AT_XDMAC_CC_WRIP_IN_PROGRESS	(0x1 << 23)
#define		AT_XDMAC_CC_PERID(i)	((0x7f & (i)) << 24)	/* Channel Peripheral Identifier */
#define AT_XDMAC_CDS_MSP	0x2C	/* Channel Data Stride Memory Set Pattern */
#define AT_XDMAC_CSUS		0x30	/* Channel Source Microblock Stride */
#define AT_XDMAC_CDUS		0x34	/* Channel Destination Microblock Stride */

/* Microblock control members */
#define AT_XDMAC_MBR_UBC_UBLEN_MAX	0xFFFFFFUL	/* Maximum Microblock Length */
#define AT_XDMAC_MBR_UBC_NDE		(0x1 << 24)	/* Next Descriptor Enable */
#define AT_XDMAC_MBR_UBC_NSEN		(0x1 << 25)	/* Next Descriptor Source Update */
#define AT_XDMAC_MBR_UBC_NDEN		(0x1 << 26)	/* Next Descriptor Destination Update */
#define AT_XDMAC_MBR_UBC_NDV0		(0x0 << 27)	/* Next Descriptor View 0 */
#define AT_XDMAC_MBR_UBC_NDV1		(0x1 << 27)	/* Next Descriptor View 1 */
#define AT_XDMAC_MBR_UBC_NDV2		(0x2 << 27)	/* Next Descriptor View 2 */
#define AT_XDMAC_MBR_UBC_NDV3		(0x3 << 27)	/* Next Descriptor View 3 */

#define AT_XDMAC_MAX_CHAN	0x20
#define AT_XDMAC_MAX_CSIZE	16	/* 16 data */
#define AT_XDMAC_MAX_DWIDTH	8	/* 64 bits */
#define AT_XDMAC_RESIDUE_MAX_RETRIES	5

#define AT_XDMAC_DMA_BUSWIDTHS\
	(BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\
	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |\
	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |\
	BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |\
	BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))

enum atc_status {
	AT_XDMAC_CHAN_IS_CYCLIC = 0,
	AT_XDMAC_CHAN_IS_PAUSED,
	AT_XDMAC_CHAN_IS_PAUSED_INTERNAL,
};

struct at_xdmac_layout {
	/* Global Channel Read Suspend Register */
	u8				grs;
	/* Global Write Suspend Register */
	u8				gws;
	/* Global Channel Read Write Suspend Register */
	u8				grws;
	/* Global Channel Read Write Resume Register */
	u8				grwr;
	/* Global Channel Software Request Register */
	u8				gswr;
	/* Global channel Software Request Status Register */
	u8				gsws;
	/* Global Channel Software Flush Request Register */
	u8				gswf;
	/* Channel reg base */
	u8				chan_cc_reg_base;
	/* Source/Destination Interface must be specified or not */
	bool				sdif;
	/* AXI queue priority configuration supported */
	bool				axi_config;
};

/* ----- Channels ----- */
struct at_xdmac_chan {
	struct dma_chan			chan;
	void __iomem			*ch_regs;
	u32				mask;		/* Channel Mask */
	u32				cfg;		/* Channel Configuration Register */
	u8				perid;		/* Peripheral ID */
	u8				perif;		/* Peripheral Interface */
	u8				memif;		/* Memory Interface */
	u32				save_cc;
	u32				save_cim;
	u32				save_cnda;
	u32				save_cndc;
	u32				irq_status;
	unsigned long			status;
	struct tasklet_struct		tasklet;
	struct dma_slave_config		sconfig;

	spinlock_t			lock;

	struct list_head		xfers_list;
	struct list_head		free_descs_list;
};


/* ----- Controller ----- */
struct at_xdmac {
	struct dma_device	dma;
	void __iomem		*regs;
	struct device		*dev;
	int			irq;
	struct clk		*clk;
	u32			save_gim;
	u32			save_gs;
	struct dma_pool		*at_xdmac_desc_pool;
	const struct at_xdmac_layout	*layout;
	struct at_xdmac_chan	chan[];
};


/* ----- Descriptors ----- */

/* Linked List Descriptor */
struct at_xdmac_lld {
	u32 mbr_nda;	/* Next Descriptor Member */
	u32 mbr_ubc;	/* Microblock Control Member */
	u32 mbr_sa;	/* Source Address Member */
	u32 mbr_da;	/* Destination Address Member */
	u32 mbr_cfg;	/* Configuration Register */
	u32 mbr_bc;	/* Block Control Register */
	u32 mbr_ds;	/* Data Stride Register */
	u32 mbr_sus;	/* Source Microblock Stride Register */
	u32 mbr_dus;	/* Destination Microblock Stride Register */
};

/* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */
struct at_xdmac_desc {
	struct at_xdmac_lld		lld;
	enum dma_transfer_direction	direction;
	struct dma_async_tx_descriptor	tx_dma_desc;
	struct list_head		desc_node;
	/* Following members are only used by the first descriptor */
	bool				active_xfer;
	unsigned int			xfer_size;
	struct list_head		descs_list;
	struct list_head		xfer_node;
} __aligned(sizeof(u64));

static const struct at_xdmac_layout at_xdmac_sama5d4_layout = {
	.grs = 0x28,
	.gws = 0x2C,
	.grws = 0x30,
	.grwr = 0x34,
	.gswr = 0x38,
	.gsws = 0x3C,
	.gswf = 0x40,
	.chan_cc_reg_base = 0x50,
	.sdif = true,
	.axi_config = false,
};

static const struct at_xdmac_layout at_xdmac_sama7g5_layout = {
	.grs = 0x30,
	.gws = 0x38,
	.grws = 0x40,
	.grwr = 0x44,
	.gswr = 0x48,
	.gsws = 0x4C,
	.gswf = 0x50,
	.chan_cc_reg_base = 0x60,
	.sdif = false,
	.axi_config = true,
};

static inline void __iomem *at_xdmac_chan_reg_base(struct at_xdmac *atxdmac, unsigned int chan_nb)
{
	return atxdmac->regs + (atxdmac->layout->chan_cc_reg_base + chan_nb * 0x40);
}

#define at_xdmac_read(atxdmac, reg) readl_relaxed((atxdmac)->regs + (reg))
#define at_xdmac_write(atxdmac, reg, value) \
	writel_relaxed((value), (atxdmac)->regs + (reg))

#define at_xdmac_chan_read(atchan, reg) readl_relaxed((atchan)->ch_regs + (reg))
#define at_xdmac_chan_write(atchan, reg, value) writel_relaxed((value), (atchan)->ch_regs + (reg))

static inline struct at_xdmac_chan *to_at_xdmac_chan(struct dma_chan *dchan)
{
	return container_of(dchan, struct at_xdmac_chan, chan);
}

static struct device *chan2dev(struct dma_chan *chan)
{
	return &chan->dev->device;
}

static inline struct at_xdmac *to_at_xdmac(struct dma_device *ddev)
{
	return container_of(ddev, struct at_xdmac, dma);
}

static inline struct at_xdmac_desc *txd_to_at_desc(struct dma_async_tx_descriptor *txd)
{
	return container_of(txd, struct at_xdmac_desc, tx_dma_desc);
}

static inline int at_xdmac_chan_is_cyclic(struct at_xdmac_chan *atchan)
{
	return test_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status);
}

static inline int at_xdmac_chan_is_paused(struct at_xdmac_chan *atchan)
{
	return test_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
}

static inline int at_xdmac_chan_is_paused_internal(struct at_xdmac_chan *atchan)
{
	return test_bit(AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, &atchan->status);
}

static inline bool at_xdmac_chan_is_peripheral_xfer(u32 cfg)
{
	return cfg & AT_XDMAC_CC_TYPE_PER_TRAN;
}

static inline u8 at_xdmac_get_dwidth(u32 cfg)
{
	return (cfg & AT_XDMAC_CC_DWIDTH_MASK) >> AT_XDMAC_CC_DWIDTH_OFFSET;
};

static unsigned int init_nr_desc_per_channel = 64;
module_param(init_nr_desc_per_channel, uint, 0644);
MODULE_PARM_DESC(init_nr_desc_per_channel,
		 "initial descriptors per channel (default: 64)");


static void at_xdmac_runtime_suspend_descriptors(struct at_xdmac_chan *atchan)
{
	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
	struct at_xdmac_desc	*desc, *_desc;

	list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) {
		if (!desc->active_xfer)
			continue;

		pm_runtime_mark_last_busy(atxdmac->dev);
		pm_runtime_put_autosuspend(atxdmac->dev);
	}
}

static int at_xdmac_runtime_resume_descriptors(struct at_xdmac_chan *atchan)
{
	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
	struct at_xdmac_desc	*desc, *_desc;
	int			ret;

	list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) {
		if (!desc->active_xfer)
			continue;

		ret = pm_runtime_resume_and_get(atxdmac->dev);
		if (ret < 0)
			return ret;
	}

	return 0;
}

static bool at_xdmac_chan_is_enabled(struct at_xdmac_chan *atchan)
{
	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
	int			ret;

	ret = pm_runtime_resume_and_get(atxdmac->dev);
	if (ret < 0)
		return false;

	ret = !!(at_xdmac_chan_read(atchan, AT_XDMAC_GS) & atchan->mask);

	pm_runtime_mark_last_busy(atxdmac->dev);
	pm_runtime_put_autosuspend(atxdmac->dev);

	return ret;
}

static void at_xdmac_off(struct at_xdmac *atxdmac, bool suspend_descriptors)
{
	struct dma_chan		*chan, *_chan;
	struct at_xdmac_chan	*atchan;
	int			ret;

	ret = pm_runtime_resume_and_get(atxdmac->dev);
	if (ret < 0)
		return;

	at_xdmac_write(atxdmac, AT_XDMAC_GD, -1L);

	/* Wait that all chans are disabled. */
	while (at_xdmac_read(atxdmac, AT_XDMAC_GS))
		cpu_relax();

	at_xdmac_write(atxdmac, AT_XDMAC_GID, -1L);

	/* Decrement runtime PM ref counter for each active descriptor. */
	if (!list_empty(&atxdmac->dma.channels) && suspend_descriptors) {
		list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels,
					 device_node) {
			atchan = to_at_xdmac_chan(chan);
			at_xdmac_runtime_suspend_descriptors(atchan);
		}
	}

	pm_runtime_mark_last_busy(atxdmac->dev);
	pm_runtime_put_autosuspend(atxdmac->dev);
}

/* Call with lock hold. */
static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan,
				struct at_xdmac_desc *first)
{
	struct at_xdmac	*atxdmac = to_at_xdmac(atchan->chan.device);
	u32		reg;
	int		ret;

	ret = pm_runtime_resume_and_get(atxdmac->dev);
	if (ret < 0)
		return;

	dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, first);

	/* Set transfer as active to not try to start it again. */
	first->active_xfer = true;

	/* Tell xdmac where to get the first descriptor. */
	reg = AT_XDMAC_CNDA_NDA(first->tx_dma_desc.phys);
	if (atxdmac->layout->sdif)
		reg |= AT_XDMAC_CNDA_NDAIF(atchan->memif);

	at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, reg);

	/*
	 * When doing non cyclic transfer we need to use the next
	 * descriptor view 2 since some fields of the configuration register
	 * depend on transfer size and src/dest addresses.
	 */
	if (at_xdmac_chan_is_cyclic(atchan))
		reg = AT_XDMAC_CNDC_NDVIEW_NDV1;
	else if ((first->lld.mbr_ubc &
		  AT_XDMAC_CNDC_NDVIEW_MASK) == AT_XDMAC_MBR_UBC_NDV3)
		reg = AT_XDMAC_CNDC_NDVIEW_NDV3;
	else
		reg = AT_XDMAC_CNDC_NDVIEW_NDV2;
	/*
	 * Even if the register will be updated from the configuration in the
	 * descriptor when using view 2 or higher, the PROT bit won't be set
	 * properly. This bit can be modified only by using the channel
	 * configuration register.
	 */
	at_xdmac_chan_write(atchan, AT_XDMAC_CC, first->lld.mbr_cfg);

	reg |= AT_XDMAC_CNDC_NDDUP
	       | AT_XDMAC_CNDC_NDSUP
	       | AT_XDMAC_CNDC_NDE;
	at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, reg);

	dev_vdbg(chan2dev(&atchan->chan),
		 "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
		 __func__, at_xdmac_chan_read(atchan, AT_XDMAC_CC),
		 at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
		 at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
		 at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
		 at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
		 at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));

	at_xdmac_chan_write(atchan, AT_XDMAC_CID, 0xffffffff);
	reg = AT_XDMAC_CIE_RBEIE | AT_XDMAC_CIE_WBEIE;
	/*
	 * Request Overflow Error is only for peripheral synchronized transfers
	 */
	if (at_xdmac_chan_is_peripheral_xfer(first->lld.mbr_cfg))
		reg |= AT_XDMAC_CIE_ROIE;

	/*
	 * There is no end of list when doing cyclic dma, we need to get
	 * an interrupt after each periods.
	 */
	if (at_xdmac_chan_is_cyclic(atchan))
		at_xdmac_chan_write(atchan, AT_XDMAC_CIE,
				    reg | AT_XDMAC_CIE_BIE);
	else
		at_xdmac_chan_write(atchan, AT_XDMAC_CIE,
				    reg | AT_XDMAC_CIE_LIE);
	at_xdmac_write(atxdmac, AT_XDMAC_GIE, atchan->mask);
	dev_vdbg(chan2dev(&atchan->chan),
		 "%s: enable channel (0x%08x)\n", __func__, atchan->mask);
	wmb();
	at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask);

	dev_vdbg(chan2dev(&atchan->chan),
		 "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
		 __func__, at_xdmac_chan_read(atchan, AT_XDMAC_CC),
		 at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
		 at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
		 at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
		 at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
		 at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));
}

static dma_cookie_t at_xdmac_tx_submit(struct dma_async_tx_descriptor *tx)
{
	struct at_xdmac_desc	*desc = txd_to_at_desc(tx);
	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(tx->chan);
	dma_cookie_t		cookie;
	unsigned long		irqflags;

	spin_lock_irqsave(&atchan->lock, irqflags);
	cookie = dma_cookie_assign(tx);

	list_add_tail(&desc->xfer_node, &atchan->xfers_list);
	spin_unlock_irqrestore(&atchan->lock, irqflags);

	dev_vdbg(chan2dev(tx->chan), "%s: atchan 0x%p, add desc 0x%p to xfers_list\n",
		 __func__, atchan, desc);

	return cookie;
}

static struct at_xdmac_desc *at_xdmac_alloc_desc(struct dma_chan *chan,
						 gfp_t gfp_flags)
{
	struct at_xdmac_desc	*desc;
	struct at_xdmac		*atxdmac = to_at_xdmac(chan->device);
	dma_addr_t		phys;

	desc = dma_pool_zalloc(atxdmac->at_xdmac_desc_pool, gfp_flags, &phys);
	if (desc) {
		INIT_LIST_HEAD(&desc->descs_list);
		dma_async_tx_descriptor_init(&desc->tx_dma_desc, chan);
		desc->tx_dma_desc.tx_submit = at_xdmac_tx_submit;
		desc->tx_dma_desc.phys = phys;
	}

	return desc;
}

static void at_xdmac_init_used_desc(struct at_xdmac_desc *desc)
{
	memset(&desc->lld, 0, sizeof(desc->lld));
	INIT_LIST_HEAD(&desc->descs_list);
	desc->direction = DMA_TRANS_NONE;
	desc->xfer_size = 0;
	desc->active_xfer = false;
}

/* Call must be protected by lock. */
static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan)
{
	struct at_xdmac_desc *desc;

	if (list_empty(&atchan->free_descs_list)) {
		desc = at_xdmac_alloc_desc(&atchan->chan, GFP_NOWAIT);
	} else {
		desc = list_first_entry(&atchan->free_descs_list,
					struct at_xdmac_desc, desc_node);
		list_del(&desc->desc_node);
		at_xdmac_init_used_desc(desc);
	}

	return desc;
}

static void at_xdmac_queue_desc(struct dma_chan *chan,
				struct at_xdmac_desc *prev,
				struct at_xdmac_desc *desc)
{
	if (!prev || !desc)
		return;

	prev->lld.mbr_nda = desc->tx_dma_desc.phys;
	prev->lld.mbr_ubc |= AT_XDMAC_MBR_UBC_NDE;

	dev_dbg(chan2dev(chan),	"%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
		__func__, prev, &prev->lld.mbr_nda);
}

static inline void at_xdmac_increment_block_count(struct dma_chan *chan,
						  struct at_xdmac_desc *desc)
{
	if (!desc)
		return;

	desc->lld.mbr_bc++;

	dev_dbg(chan2dev(chan),
		"%s: incrementing the block count of the desc 0x%p\n",
		__func__, desc);
}

static struct dma_chan *at_xdmac_xlate(struct of_phandle_args *dma_spec,
				       struct of_dma *of_dma)
{
	struct at_xdmac		*atxdmac = of_dma->of_dma_data;
	struct at_xdmac_chan	*atchan;
	struct dma_chan		*chan;
	struct device		*dev = atxdmac->dma.dev;

	if (dma_spec->args_count != 1) {
		dev_err(dev, "dma phandler args: bad number of args\n");
		return NULL;
	}

	chan = dma_get_any_slave_channel(&atxdmac->dma);
	if (!chan) {
		dev_err(dev, "can't get a dma channel\n");
		return NULL;
	}

	atchan = to_at_xdmac_chan(chan);
	atchan->memif = AT91_XDMAC_DT_GET_MEM_IF(dma_spec->args[0]);
	atchan->perif = AT91_XDMAC_DT_GET_PER_IF(dma_spec->args[0]);
	atchan->perid = AT91_XDMAC_DT_GET_PERID(dma_spec->args[0]);
	dev_dbg(dev, "chan dt cfg: memif=%u perif=%u perid=%u\n",
		 atchan->memif, atchan->perif, atchan->perid);

	return chan;
}

static int at_xdmac_compute_chan_conf(struct dma_chan *chan,
				      enum dma_transfer_direction direction)
{
	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
	int			csize, dwidth;

	if (direction == DMA_DEV_TO_MEM) {
		atchan->cfg =
			AT91_XDMAC_DT_PERID(atchan->perid)
			| AT_XDMAC_CC_DAM_INCREMENTED_AM
			| AT_XDMAC_CC_SAM_FIXED_AM
			| AT_XDMAC_CC_SWREQ_HWR_CONNECTED
			| AT_XDMAC_CC_DSYNC_PER2MEM
			| AT_XDMAC_CC_MBSIZE_SIXTEEN
			| AT_XDMAC_CC_TYPE_PER_TRAN;
		if (atxdmac->layout->sdif)
			atchan->cfg |= AT_XDMAC_CC_DIF(atchan->memif) |
				       AT_XDMAC_CC_SIF(atchan->perif);

		csize = ffs(atchan->sconfig.src_maxburst) - 1;
		if (csize < 0) {
			dev_err(chan2dev(chan), "invalid src maxburst value\n");
			return -EINVAL;
		}
		atchan->cfg |= AT_XDMAC_CC_CSIZE(csize);
		dwidth = ffs(atchan->sconfig.src_addr_width) - 1;
		if (dwidth < 0) {
			dev_err(chan2dev(chan), "invalid src addr width value\n");
			return -EINVAL;
		}
		atchan->cfg |= AT_XDMAC_CC_DWIDTH(dwidth);
	} else if (direction == DMA_MEM_TO_DEV) {
		atchan->cfg =
			AT91_XDMAC_DT_PERID(atchan->perid)
			| AT_XDMAC_CC_DAM_FIXED_AM
			| AT_XDMAC_CC_SAM_INCREMENTED_AM
			| AT_XDMAC_CC_SWREQ_HWR_CONNECTED
			| AT_XDMAC_CC_DSYNC_MEM2PER
			| AT_XDMAC_CC_MBSIZE_SIXTEEN
			| AT_XDMAC_CC_TYPE_PER_TRAN;
		if (atxdmac->layout->sdif)
			atchan->cfg |= AT_XDMAC_CC_DIF(atchan->perif) |
				       AT_XDMAC_CC_SIF(atchan->memif);

		csize = ffs(atchan->sconfig.dst_maxburst) - 1;
		if (csize < 0) {
			dev_err(chan2dev(chan), "invalid src maxburst value\n");
			return -EINVAL;
		}
		atchan->cfg |= AT_XDMAC_CC_CSIZE(csize);
		dwidth = ffs(atchan->sconfig.dst_addr_width) - 1;
		if (dwidth < 0) {
			dev_err(chan2dev(chan), "invalid dst addr width value\n");
			return -EINVAL;
		}
		atchan->cfg |= AT_XDMAC_CC_DWIDTH(dwidth);
	}

	dev_dbg(chan2dev(chan),	"%s: cfg=0x%08x\n", __func__, atchan->cfg);

	return 0;
}

/*
 * Only check that maxburst and addr width values are supported by
 * the controller but not that the configuration is good to perform the
 * transfer since we don't know the direction at this stage.
 */
static int at_xdmac_check_slave_config(struct dma_slave_config *sconfig)
{
	if ((sconfig->src_maxburst > AT_XDMAC_MAX_CSIZE)
	    || (sconfig->dst_maxburst > AT_XDMAC_MAX_CSIZE))
		return -EINVAL;

	if ((sconfig->src_addr_width > AT_XDMAC_MAX_DWIDTH)
	    || (sconfig->dst_addr_width > AT_XDMAC_MAX_DWIDTH))
		return -EINVAL;

	return 0;
}

static int at_xdmac_set_slave_config(struct dma_chan *chan,
				      struct dma_slave_config *sconfig)
{
	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);

	if (at_xdmac_check_slave_config(sconfig)) {
		dev_err(chan2dev(chan), "invalid slave configuration\n");
		return -EINVAL;
	}

	memcpy(&atchan->sconfig, sconfig, sizeof(atchan->sconfig));

	return 0;
}

static struct dma_async_tx_descriptor *
at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
		       unsigned int sg_len, enum dma_transfer_direction direction,
		       unsigned long flags, void *context)
{
	struct at_xdmac_chan		*atchan = to_at_xdmac_chan(chan);
	struct at_xdmac_desc		*first = NULL, *prev = NULL;
	struct scatterlist		*sg;
	int				i;
	unsigned int			xfer_size = 0;
	unsigned long			irqflags;
	struct dma_async_tx_descriptor	*ret = NULL;

	if (!sgl)
		return NULL;

	if (!is_slave_direction(direction)) {
		dev_err(chan2dev(chan), "invalid DMA direction\n");
		return NULL;
	}

	dev_dbg(chan2dev(chan), "%s: sg_len=%d, dir=%s, flags=0x%lx\n",
		 __func__, sg_len,
		 direction == DMA_MEM_TO_DEV ? "to device" : "from device",
		 flags);

	/* Protect dma_sconfig field that can be modified by set_slave_conf. */
	spin_lock_irqsave(&atchan->lock, irqflags);

	if (at_xdmac_compute_chan_conf(chan, direction))
		goto spin_unlock;

	/* Prepare descriptors. */
	for_each_sg(sgl, sg, sg_len, i) {
		struct at_xdmac_desc	*desc = NULL;
		u32			len, mem, dwidth, fixed_dwidth;

		len = sg_dma_len(sg);
		mem = sg_dma_address(sg);
		if (unlikely(!len)) {
			dev_err(chan2dev(chan), "sg data length is zero\n");
			goto spin_unlock;
		}
		dev_dbg(chan2dev(chan), "%s: * sg%d len=%u, mem=0x%08x\n",
			 __func__, i, len, mem);

		desc = at_xdmac_get_desc(atchan);
		if (!desc) {
			dev_err(chan2dev(chan), "can't get descriptor\n");
			if (first)
				list_splice_tail_init(&first->descs_list,
						      &atchan->free_descs_list);
			goto spin_unlock;
		}

		/* Linked list descriptor setup. */
		if (direction == DMA_DEV_TO_MEM) {
			desc->lld.mbr_sa = atchan->sconfig.src_addr;
			desc->lld.mbr_da = mem;
		} else {
			desc->lld.mbr_sa = mem;
			desc->lld.mbr_da = atchan->sconfig.dst_addr;
		}
		dwidth = at_xdmac_get_dwidth(atchan->cfg);
		fixed_dwidth = IS_ALIGNED(len, 1 << dwidth)
			       ? dwidth
			       : AT_XDMAC_CC_DWIDTH_BYTE;
		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2			/* next descriptor view */
			| AT_XDMAC_MBR_UBC_NDEN					/* next descriptor dst parameter update */
			| AT_XDMAC_MBR_UBC_NSEN					/* next descriptor src parameter update */
			| (len >> fixed_dwidth);				/* microblock length */
		desc->lld.mbr_cfg = (atchan->cfg & ~AT_XDMAC_CC_DWIDTH_MASK) |
				    AT_XDMAC_CC_DWIDTH(fixed_dwidth);
		dev_dbg(chan2dev(chan),
			 "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);

		/* Chain lld. */
		if (prev)
			at_xdmac_queue_desc(chan, prev, desc);

		prev = desc;
		if (!first)
			first = desc;

		dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
			 __func__, desc, first);
		list_add_tail(&desc->desc_node, &first->descs_list);
		xfer_size += len;
	}


	first->tx_dma_desc.flags = flags;
	first->xfer_size = xfer_size;
	first->direction = direction;
	ret = &first->tx_dma_desc;

spin_unlock:
	spin_unlock_irqrestore(&atchan->lock, irqflags);
	return ret;
}

static struct dma_async_tx_descriptor *
at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
			 size_t buf_len, size_t period_len,
			 enum dma_transfer_direction direction,
			 unsigned long flags)
{
	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
	struct at_xdmac_desc	*first = NULL, *prev = NULL;
	unsigned int		periods = buf_len / period_len;
	int			i;
	unsigned long		irqflags;

	dev_dbg(chan2dev(chan), "%s: buf_addr=%pad, buf_len=%zd, period_len=%zd, dir=%s, flags=0x%lx\n",
		__func__, &buf_addr, buf_len, period_len,
		direction == DMA_MEM_TO_DEV ? "mem2per" : "per2mem", flags);

	if (!is_slave_direction(direction)) {
		dev_err(chan2dev(chan), "invalid DMA direction\n");
		return NULL;
	}

	if (test_and_set_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status)) {
		dev_err(chan2dev(chan), "channel currently used\n");
		return NULL;
	}

	if (at_xdmac_compute_chan_conf(chan, direction))
		return NULL;

	for (i = 0; i < periods; i++) {
		struct at_xdmac_desc	*desc = NULL;

		spin_lock_irqsave(&atchan->lock, irqflags);
		desc = at_xdmac_get_desc(atchan);
		if (!desc) {
			dev_err(chan2dev(chan), "can't get descriptor\n");
			if (first)
				list_splice_tail_init(&first->descs_list,
						      &atchan->free_descs_list);
			spin_unlock_irqrestore(&atchan->lock, irqflags);
			return NULL;
		}
		spin_unlock_irqrestore(&atchan->lock, irqflags);
		dev_dbg(chan2dev(chan),
			"%s: desc=0x%p, tx_dma_desc.phys=%pad\n",
			__func__, desc, &desc->tx_dma_desc.phys);

		if (direction == DMA_DEV_TO_MEM) {
			desc->lld.mbr_sa = atchan->sconfig.src_addr;
			desc->lld.mbr_da = buf_addr + i * period_len;
		} else {
			desc->lld.mbr_sa = buf_addr + i * period_len;
			desc->lld.mbr_da = atchan->sconfig.dst_addr;
		}
		desc->lld.mbr_cfg = atchan->cfg;
		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1
			| AT_XDMAC_MBR_UBC_NDEN
			| AT_XDMAC_MBR_UBC_NSEN
			| period_len >> at_xdmac_get_dwidth(desc->lld.mbr_cfg);

		dev_dbg(chan2dev(chan),
			 "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);

		/* Chain lld. */
		if (prev)
			at_xdmac_queue_desc(chan, prev, desc);

		prev = desc;
		if (!first)
			first = desc;

		dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
			 __func__, desc, first);
		list_add_tail(&desc->desc_node, &first->descs_list);
	}

	at_xdmac_queue_desc(chan, prev, first);
	first->tx_dma_desc.flags = flags;
	first->xfer_size = buf_len;
	first->direction = direction;

	return &first->tx_dma_desc;
}

static inline u32 at_xdmac_align_width(struct dma_chan *chan, dma_addr_t addr)
{
	u32 width;

	/*
	 * Check address alignment to select the greater data width we
	 * can use.
	 *
	 * Some XDMAC implementations don't provide dword transfer, in
	 * this case selecting dword has the same behavior as
	 * selecting word transfers.
	 */
	if (!(addr & 7)) {
		width = AT_XDMAC_CC_DWIDTH_DWORD;
		dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__);
	} else if (!(addr & 3)) {
		width = AT_XDMAC_CC_DWIDTH_WORD;
		dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__);
	} else if (!(addr & 1)) {
		width = AT_XDMAC_CC_DWIDTH_HALFWORD;
		dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__);
	} else {
		width = AT_XDMAC_CC_DWIDTH_BYTE;
		dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__);
	}

	return width;
}

static struct at_xdmac_desc *
at_xdmac_interleaved_queue_desc(struct dma_chan *chan,
				struct at_xdmac_chan *atchan,
				struct at_xdmac_desc *prev,
				dma_addr_t src, dma_addr_t dst,
				struct dma_interleaved_template *xt,
				struct data_chunk *chunk)
{
	struct at_xdmac_desc	*desc;
	u32			dwidth;
	unsigned long		flags;
	size_t			ublen;
	/*
	 * WARNING: The channel configuration is set here since there is no
	 * dmaengine_slave_config call in this case. Moreover we don't know the
	 * direction, it involves we can't dynamically set the source and dest
	 * interface so we have to use the same one. Only interface 0 allows EBI
	 * access. Hopefully we can access DDR through both ports (at least on
	 * SAMA5D4x), so we can use the same interface for source and dest,
	 * that solves the fact we don't know the direction.
	 * ERRATA: Even if useless for memory transfers, the PERID has to not
	 * match the one of another channel. If not, it could lead to spurious
	 * flag status.
	 * For SAMA7G5x case, the SIF and DIF fields are no longer used.
	 * Thus, no need to have the SIF/DIF interfaces here.
	 * For SAMA5D4x and SAMA5D2x the SIF and DIF are already configured as
	 * zero.
	 */
	u32			chan_cc = AT_XDMAC_CC_PERID(0x7f)
					| AT_XDMAC_CC_MBSIZE_SIXTEEN
					| AT_XDMAC_CC_TYPE_MEM_TRAN;

	dwidth = at_xdmac_align_width(chan, src | dst | chunk->size);
	if (chunk->size >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) {
		dev_dbg(chan2dev(chan),
			"%s: chunk too big (%zu, max size %lu)...\n",
			__func__, chunk->size,
			AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth);
		return NULL;
	}

	if (prev)
		dev_dbg(chan2dev(chan),
			"Adding items at the end of desc 0x%p\n", prev);

	if (xt->src_inc) {
		if (xt->src_sgl)
			chan_cc |=  AT_XDMAC_CC_SAM_UBS_AM;
		else
			chan_cc |=  AT_XDMAC_CC_SAM_INCREMENTED_AM;
	}

	if (xt->dst_inc) {
		if (xt->dst_sgl)
			chan_cc |=  AT_XDMAC_CC_DAM_UBS_AM;
		else
			chan_cc |=  AT_XDMAC_CC_DAM_INCREMENTED_AM;
	}

	spin_lock_irqsave(&atchan->lock, flags);
	desc = at_xdmac_get_desc(atchan);
	spin_unlock_irqrestore(&atchan->lock, flags);
	if (!desc) {
		dev_err(chan2dev(chan), "can't get descriptor\n");
		return NULL;
	}

	chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);

	ublen = chunk->size >> dwidth;

	desc->lld.mbr_sa = src;
	desc->lld.mbr_da = dst;
	desc->lld.mbr_sus = dmaengine_get_src_icg(xt, chunk);
	desc->lld.mbr_dus = dmaengine_get_dst_icg(xt, chunk);

	desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3
		| AT_XDMAC_MBR_UBC_NDEN
		| AT_XDMAC_MBR_UBC_NSEN
		| ublen;
	desc->lld.mbr_cfg = chan_cc;

	dev_dbg(chan2dev(chan),
		"%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
		__func__, &desc->lld.mbr_sa, &desc->lld.mbr_da,
		desc->lld.mbr_ubc, desc->lld.mbr_cfg);

	/* Chain lld. */
	if (prev)
		at_xdmac_queue_desc(chan, prev, desc);

	return desc;
}

static struct dma_async_tx_descriptor *
at_xdmac_prep_interleaved(struct dma_chan *chan,
			  struct dma_interleaved_template *xt,
			  unsigned long flags)
{
	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
	struct at_xdmac_desc	*prev = NULL, *first = NULL;
	dma_addr_t		dst_addr, src_addr;
	size_t			src_skip = 0, dst_skip = 0, len = 0;
	struct data_chunk	*chunk;
	int			i;

	if (!xt || !xt->numf || (xt->dir != DMA_MEM_TO_MEM))
		return NULL;

	/*
	 * TODO: Handle the case where we have to repeat a chain of
	 * descriptors...
	 */
	if ((xt->numf > 1) && (xt->frame_size > 1))
		return NULL;

	dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, numf=%zu, frame_size=%zu, flags=0x%lx\n",
		__func__, &xt->src_start, &xt->dst_start,	xt->numf,
		xt->frame_size, flags);

	src_addr = xt->src_start;
	dst_addr = xt->dst_start;

	if (xt->numf > 1) {
		first = at_xdmac_interleaved_queue_desc(chan, atchan,
							NULL,
							src_addr, dst_addr,
							xt, xt->sgl);
		if (!first)
			return NULL;

		/* Length of the block is (BLEN+1) microblocks. */
		for (i = 0; i < xt->numf - 1; i++)
			at_xdmac_increment_block_count(chan, first);

		dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
			__func__, first, first);
		list_add_tail(&first->desc_node, &first->descs_list);
	} else {
		for (i = 0; i < xt->frame_size; i++) {
			size_t src_icg = 0, dst_icg = 0;
			struct at_xdmac_desc *desc;

			chunk = xt->sgl + i;

			dst_icg = dmaengine_get_dst_icg(xt, chunk);
			src_icg = dmaengine_get_src_icg(xt, chunk);

			src_skip = chunk->size + src_icg;
			dst_skip = chunk->size + dst_icg;

			dev_dbg(chan2dev(chan),
				"%s: chunk size=%zu, src icg=%zu, dst icg=%zu\n",
				__func__, chunk->size, src_icg, dst_icg);

			desc = at_xdmac_interleaved_queue_desc(chan, atchan,
							       prev,
							       src_addr, dst_addr,
							       xt, chunk);
			if (!desc) {
				if (first)
					list_splice_tail_init(&first->descs_list,
							      &atchan->free_descs_list);
				return NULL;
			}

			if (!first)
				first = desc;

			dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
				__func__, desc, first);
			list_add_tail(&desc->desc_node, &first->descs_list);

			if (xt->src_sgl)
				src_addr += src_skip;

			if (xt->dst_sgl)
				dst_addr += dst_skip;

			len += chunk->size;
			prev = desc;
		}
	}

	first->tx_dma_desc.cookie = -EBUSY;
	first->tx_dma_desc.flags = flags;
	first->xfer_size = len;

	return &first->tx_dma_desc;
}

static struct dma_async_tx_descriptor *
at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
			 size_t len, unsigned long flags)
{
	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
	struct at_xdmac_desc	*first = NULL, *prev = NULL;
	size_t			remaining_size = len, xfer_size = 0, ublen;
	dma_addr_t		src_addr = src, dst_addr = dest;
	u32			dwidth;
	/*
	 * WARNING: We don't know the direction, it involves we can't
	 * dynamically set the source and dest interface so we have to use the
	 * same one. Only interface 0 allows EBI access. Hopefully we can
	 * access DDR through both ports (at least on SAMA5D4x), so we can use
	 * the same interface for source and dest, that solves the fact we
	 * don't know the direction.
	 * ERRATA: Even if useless for memory transfers, the PERID has to not
	 * match the one of another channel. If not, it could lead to spurious
	 * flag status.
	 * For SAMA7G5x case, the SIF and DIF fields are no longer used.
	 * Thus, no need to have the SIF/DIF interfaces here.
	 * For SAMA5D4x and SAMA5D2x the SIF and DIF are already configured as
	 * zero.
	 */
	u32			chan_cc = AT_XDMAC_CC_PERID(0x7f)
					| AT_XDMAC_CC_DAM_INCREMENTED_AM
					| AT_XDMAC_CC_SAM_INCREMENTED_AM
					| AT_XDMAC_CC_MBSIZE_SIXTEEN
					| AT_XDMAC_CC_TYPE_MEM_TRAN;
	unsigned long		irqflags;

	dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, len=%zd, flags=0x%lx\n",
		__func__, &src, &dest, len, flags);

	if (unlikely(!len))
		return NULL;

	dwidth = at_xdmac_align_width(chan, src_addr | dst_addr);

	/* Prepare descriptors. */
	while (remaining_size) {
		struct at_xdmac_desc	*desc = NULL;

		dev_dbg(chan2dev(chan), "%s: remaining_size=%zu\n", __func__, remaining_size);

		spin_lock_irqsave(&atchan->lock, irqflags);
		desc = at_xdmac_get_desc(atchan);
		spin_unlock_irqrestore(&atchan->lock, irqflags);
		if (!desc) {
			dev_err(chan2dev(chan), "can't get descriptor\n");
			if (first)
				list_splice_tail_init(&first->descs_list,
						      &atchan->free_descs_list);
			return NULL;
		}

		/* Update src and dest addresses. */
		src_addr += xfer_size;
		dst_addr += xfer_size;

		if (remaining_size >= AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)
			xfer_size = AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth;
		else
			xfer_size = remaining_size;

		dev_dbg(chan2dev(chan), "%s: xfer_size=%zu\n", __func__, xfer_size);

		/* Check remaining length and change data width if needed. */
		dwidth = at_xdmac_align_width(chan,
					      src_addr | dst_addr | xfer_size);
		chan_cc &= ~AT_XDMAC_CC_DWIDTH_MASK;
		chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);

		ublen = xfer_size >> dwidth;
		remaining_size -= xfer_size;

		desc->lld.mbr_sa = src_addr;
		desc->lld.mbr_da = dst_addr;
		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2
			| AT_XDMAC_MBR_UBC_NDEN
			| AT_XDMAC_MBR_UBC_NSEN
			| ublen;
		desc->lld.mbr_cfg = chan_cc;

		dev_dbg(chan2dev(chan),
			 "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc, desc->lld.mbr_cfg);

		/* Chain lld. */
		if (prev)
			at_xdmac_queue_desc(chan, prev, desc);

		prev = desc;
		if (!first)
			first = desc;

		dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
			 __func__, desc, first);
		list_add_tail(&desc->desc_node, &first->descs_list);
	}

	first->tx_dma_desc.flags = flags;
	first->xfer_size = len;

	return &first->tx_dma_desc;
}

static struct at_xdmac_desc *at_xdmac_memset_create_desc(struct dma_chan *chan,
							 struct at_xdmac_chan *atchan,
							 dma_addr_t dst_addr,
							 size_t len,
							 int value)
{
	struct at_xdmac_desc	*desc;
	unsigned long		flags;
	size_t			ublen;
	u32			dwidth;
	char			pattern;
	/*
	 * WARNING: The channel configuration is set here since there is no
	 * dmaengine_slave_config call in this case. Moreover we don't know the
	 * direction, it involves we can't dynamically set the source and dest
	 * interface so we have to use the same one. Only interface 0 allows EBI
	 * access. Hopefully we can access DDR through both ports (at least on
	 * SAMA5D4x), so we can use the same interface for source and dest,
	 * that solves the fact we don't know the direction.
	 * ERRATA: Even if useless for memory transfers, the PERID has to not
	 * match the one of another channel. If not, it could lead to spurious
	 * flag status.
	 * For SAMA7G5x case, the SIF and DIF fields are no longer used.
	 * Thus, no need to have the SIF/DIF interfaces here.
	 * For SAMA5D4x and SAMA5D2x the SIF and DIF are already configured as
	 * zero.
	 */
	u32			chan_cc = AT_XDMAC_CC_PERID(0x7f)
					| AT_XDMAC_CC_DAM_UBS_AM
					| AT_XDMAC_CC_SAM_INCREMENTED_AM
					| AT_XDMAC_CC_MBSIZE_SIXTEEN
					| AT_XDMAC_CC_MEMSET_HW_MODE
					| AT_XDMAC_CC_TYPE_MEM_TRAN;

	dwidth = at_xdmac_align_width(chan, dst_addr);

	if (len >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) {
		dev_err(chan2dev(chan),
			"%s: Transfer too large, aborting...\n",
			__func__);
		return NULL;
	}

	spin_lock_irqsave(&atchan->lock, flags);
	desc = at_xdmac_get_desc(atchan);
	spin_unlock_irqrestore(&atchan->lock, flags);
	if (!desc) {
		dev_err(chan2dev(chan), "can't get descriptor\n");
		return NULL;
	}

	chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);

	/* Only the first byte of value is to be used according to dmaengine */
	pattern = (char)value;

	ublen = len >> dwidth;

	desc->lld.mbr_da = dst_addr;
	desc->lld.mbr_ds = (pattern << 24) |
			   (pattern << 16) |
			   (pattern << 8) |
			   pattern;
	desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3
		| AT_XDMAC_MBR_UBC_NDEN
		| AT_XDMAC_MBR_UBC_NSEN
		| ublen;
	desc->lld.mbr_cfg = chan_cc;

	dev_dbg(chan2dev(chan),
		"%s: lld: mbr_da=%pad, mbr_ds=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
		__func__, &desc->lld.mbr_da, desc->lld.mbr_ds, desc->lld.mbr_ubc,
		desc->lld.mbr_cfg);

	return desc;
}

static struct dma_async_tx_descriptor *
at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
			 size_t len, unsigned long flags)
{
	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
	struct at_xdmac_desc	*desc;

	dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%zu, pattern=0x%x, flags=0x%lx\n",
		__func__, &dest, len, value, flags);

	if (unlikely(!len))
		return NULL;

	desc = at_xdmac_memset_create_desc(chan, atchan, dest, len, value);
	list_add_tail(&desc->desc_node, &desc->descs_list);

	desc->tx_dma_desc.cookie = -EBUSY;
	desc->tx_dma_desc.flags = flags;
	desc->xfer_size = len;

	return &desc->tx_dma_desc;
}

static struct dma_async_tx_descriptor *
at_xdmac_prep_dma_memset_sg(struct dma_chan *chan, struct scatterlist *sgl,
			    unsigned int sg_len, int value,
			    unsigned long flags)
{
	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
	struct at_xdmac_desc	*desc, *pdesc = NULL,
				*ppdesc = NULL, *first = NULL;
	struct scatterlist	*sg, *psg = NULL, *ppsg = NULL;
	size_t			stride = 0, pstride = 0, len = 0;
	int			i;

	if (!sgl)
		return NULL;

	dev_dbg(chan2dev(chan), "%s: sg_len=%d, value=0x%x, flags=0x%lx\n",
		__func__, sg_len, value, flags);

	/* Prepare descriptors. */
	for_each_sg(sgl, sg, sg_len, i) {
		dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%d, pattern=0x%x, flags=0x%lx\n",
			__func__, &sg_dma_address(sg), sg_dma_len(sg),
			value, flags);
		desc = at_xdmac_memset_create_desc(chan, atchan,
						   sg_dma_address(sg),
						   sg_dma_len(sg),
						   value);
		if (!desc && first)
			list_splice_tail_init(&first->descs_list,
					      &atchan->free_descs_list);

		if (!first)
			first = desc;

		/* Update our strides */
		pstride = stride;
		if (psg)
			stride = sg_dma_address(sg) -
				(sg_dma_address(psg) + sg_dma_len(psg));

		/*
		 * The scatterlist API gives us only the address and
		 * length of each elements.
		 *
		 * Unfortunately, we don't have the stride, which we
		 * will need to compute.
		 *
		 * That make us end up in a situation like this one:
		 *    len    stride    len    stride    len
		 * +-------+        +-------+        +-------+
		 * |  N-2  |        |  N-1  |        |   N   |
		 * +-------+        +-------+        +-------+
		 *
		 * We need all these three elements (N-2, N-1 and N)
		 * to actually take the decision on whether we need to
		 * queue N-1 or reuse N-2.
		 *
		 * We will only consider N if it is the last element.
		 */
		if (ppdesc && pdesc) {
			if ((stride == pstride) &&
			    (sg_dma_len(ppsg) == sg_dma_len(psg))) {
				dev_dbg(chan2dev(chan),
					"%s: desc 0x%p can be merged with desc 0x%p\n",
					__func__, pdesc, ppdesc);

				/*
				 * Increment the block count of the
				 * N-2 descriptor
				 */
				at_xdmac_increment_block_count(chan, ppdesc);
				ppdesc->lld.mbr_dus = stride;

				/*
				 * Put back the N-1 descriptor in the
				 * free descriptor list
				 */
				list_add_tail(&pdesc->desc_node,
					      &atchan->free_descs_list);

				/*
				 * Make our N-1 descriptor pointer
				 * point to the N-2 since they were
				 * actually merged.
				 */
				pdesc = ppdesc;

			/*
			 * Rule out the case where we don't have
			 * pstride computed yet (our second sg
			 * element)
			 *
			 * We also want to catch the case where there
			 * would be a negative stride,
			 */
			} else if (pstride ||
				   sg_dma_address(sg) < sg_dma_address(psg)) {
				/*
				 * Queue the N-1 descriptor after the
				 * N-2
				 */
				at_xdmac_queue_desc(chan, ppdesc, pdesc);

				/*
				 * Add the N-1 descriptor to the list
				 * of the descriptors used for this
				 * transfer
				 */
				list_add_tail(&desc->desc_node,
					      &first->descs_list);
				dev_dbg(chan2dev(chan),
					"%s: add desc 0x%p to descs_list 0x%p\n",
					__func__, desc, first);
			}
		}

		/*
		 * If we are the last element, just see if we have the
		 * same size than the previous element.
		 *
		 * If so, we can merge it with the previous descriptor
		 * since we don't care about the stride anymore.
		 */
		if ((i == (sg_len - 1)) &&
		    sg_dma_len(psg) == sg_dma_len(sg)) {
			dev_dbg(chan2dev(chan),
				"%s: desc 0x%p can be merged with desc 0x%p\n",
				__func__, desc, pdesc);

			/*
			 * Increment the block count of the N-1
			 * descriptor
			 */
			at_xdmac_increment_block_count(chan, pdesc);
			pdesc->lld.mbr_dus = stride;

			/*
			 * Put back the N descriptor in the free
			 * descriptor list
			 */
			list_add_tail(&desc->desc_node,
				      &atchan->free_descs_list);
		}

		/* Update our descriptors */
		ppdesc = pdesc;
		pdesc = desc;

		/* Update our scatter pointers */
		ppsg = psg;
		psg = sg;

		len += sg_dma_len(sg);
	}

	first->tx_dma_desc.cookie = -EBUSY;
	first->tx_dma_desc.flags = flags;
	first->xfer_size = len;

	return &first->tx_dma_desc;
}

static enum dma_status
at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
		   struct dma_tx_state *txstate)
{
	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
	struct at_xdmac_desc	*desc, *_desc, *iter;
	struct list_head	*descs_list;
	enum dma_status		ret;
	int			residue, retry, pm_status;
	u32			cur_nda, check_nda, cur_ubc, mask, value;
	u8			dwidth = 0;
	unsigned long		flags;
	bool			initd;

	ret = dma_cookie_status(chan, cookie, txstate);
	if (ret == DMA_COMPLETE || !txstate)
		return ret;

	pm_status = pm_runtime_resume_and_get(atxdmac->dev);
	if (pm_status < 0)
		return DMA_ERROR;

	spin_lock_irqsave(&atchan->lock, flags);

	desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, xfer_node);

	/*
	 * If the transfer has not been started yet, don't need to compute the
	 * residue, it's the transfer length.
	 */
	if (!desc->active_xfer) {
		dma_set_residue(txstate, desc->xfer_size);
		goto spin_unlock;
	}

	residue = desc->xfer_size;
	/*
	 * Flush FIFO: only relevant when the transfer is source peripheral
	 * synchronized. Flush is needed before reading CUBC because data in
	 * the FIFO are not reported by CUBC. Reporting a residue of the
	 * transfer length while we have data in FIFO can cause issue.
	 * Usecase: atmel USART has a timeout which means I have received
	 * characters but there is no more character received for a while. On
	 * timeout, it requests the residue. If the data are in the DMA FIFO,
	 * we will return a residue of the transfer length. It means no data
	 * received. If an application is waiting for these data, it will hang
	 * since we won't have another USART timeout without receiving new
	 * data.
	 */
	mask = AT_XDMAC_CC_TYPE | AT_XDMAC_CC_DSYNC;
	value = AT_XDMAC_CC_TYPE_PER_TRAN | AT_XDMAC_CC_DSYNC_PER2MEM;
	if ((desc->lld.mbr_cfg & mask) == value) {
		at_xdmac_write(atxdmac, atxdmac->layout->gswf, atchan->mask);
		while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS))
			cpu_relax();
	}

	/*
	 * The easiest way to compute the residue should be to pause the DMA
	 * but doing this can lead to miss some data as some devices don't
	 * have FIFO.
	 * We need to read several registers because:
	 * - DMA is running therefore a descriptor change is possible while
	 * reading these registers
	 * - When the block transfer is done, the value of the CUBC register
	 * is set to its initial value until the fetch of the next descriptor.
	 * This value will corrupt the residue calculation so we have to skip
	 * it.
	 *
	 * INITD --------                    ------------
	 *              |____________________|
	 *       _______________________  _______________
	 * NDA       @desc2             \/   @desc3
	 *       _______________________/\_______________
	 *       __________  ___________  _______________
	 * CUBC       0    \/ MAX desc1 \/  MAX desc2
	 *       __________/\___________/\_______________
	 *
	 * Since descriptors are aligned on 64 bits, we can assume that
	 * the update of NDA and CUBC is atomic.
	 * Memory barriers are used to ensure the read order of the registers.
	 * A max number of retries is set because unlikely it could never ends.
	 */
	for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) {
		check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
		rmb();
		cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
		rmb();
		initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD);
		rmb();
		cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
		rmb();

		if ((check_nda == cur_nda) && initd)
			break;
	}

	if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) {
		ret = DMA_ERROR;
		goto spin_unlock;
	}

	/*
	 * Flush FIFO: only relevant when the transfer is source peripheral
	 * synchronized. Another flush is needed here because CUBC is updated
	 * when the controller sends the data write command. It can lead to
	 * report data that are not written in the memory or the device. The
	 * FIFO flush ensures that data are really written.
	 */
	if ((desc->lld.mbr_cfg & mask) == value) {
		at_xdmac_write(atxdmac, atxdmac->layout->gswf, atchan->mask);
		while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS))
			cpu_relax();
	}

	/*
	 * Remove size of all microblocks already transferred and the current
	 * one. Then add the remaining size to transfer of the current
	 * microblock.
	 */
	descs_list = &desc->descs_list;
	list_for_each_entry_safe(iter, _desc, descs_list, desc_node) {
		dwidth = at_xdmac_get_dwidth(iter->lld.mbr_cfg);
		residue -= (iter->lld.mbr_ubc & 0xffffff) << dwidth;
		if ((iter->lld.mbr_nda & 0xfffffffc) == cur_nda) {
			desc = iter;
			break;
		}
	}
	residue += cur_ubc << dwidth;

	dma_set_residue(txstate, residue);

	dev_dbg(chan2dev(chan),
		 "%s: desc=0x%p, tx_dma_desc.phys=%pad, tx_status=%d, cookie=%d, residue=%d\n",
		 __func__, desc, &desc->tx_dma_desc.phys, ret, cookie, residue);

spin_unlock:
	spin_unlock_irqrestore(&atchan->lock, flags);
	pm_runtime_mark_last_busy(atxdmac->dev);
	pm_runtime_put_autosuspend(atxdmac->dev);
	return ret;
}

static void at_xdmac_advance_work(struct at_xdmac_chan *atchan)
{
	struct at_xdmac_desc	*desc;

	/*
	 * If channel is enabled, do nothing, advance_work will be triggered
	 * after the interruption.
	 */
	if (at_xdmac_chan_is_enabled(atchan) || list_empty(&atchan->xfers_list))
		return;

	desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc,
				xfer_node);
	dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
	if (!desc->active_xfer)
		at_xdmac_start_xfer(atchan, desc);
}

static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan)
{
	struct at_xdmac_desc		*desc;
	struct dma_async_tx_descriptor	*txd;

	spin_lock_irq(&atchan->lock);
	dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n",
		__func__, atchan->irq_status);
	if (list_empty(&atchan->xfers_list)) {
		spin_unlock_irq(&atchan->lock);
		return;
	}
	desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc,
				xfer_node);
	spin_unlock_irq(&atchan->lock);
	txd = &desc->tx_dma_desc;
	if (txd->flags & DMA_PREP_INTERRUPT)
		dmaengine_desc_get_callback_invoke(txd, NULL);
}

/* Called with atchan->lock held. */
static void at_xdmac_handle_error(struct at_xdmac_chan *atchan)
{
	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
	struct at_xdmac_desc	*bad_desc;
	int			ret;

	ret = pm_runtime_resume_and_get(atxdmac->dev);
	if (ret < 0)
		return;

	/*
	 * The descriptor currently at the head of the active list is
	 * broken. Since we don't have any way to report errors, we'll
	 * just have to scream loudly and try to continue with other
	 * descriptors queued (if any).
	 */
	if (atchan->irq_status & AT_XDMAC_CIS_RBEIS)
		dev_err(chan2dev(&atchan->chan), "read bus error!!!");
	if (atchan->irq_status & AT_XDMAC_CIS_WBEIS)
		dev_err(chan2dev(&atchan->chan), "write bus error!!!");
	if (atchan->irq_status & AT_XDMAC_CIS_ROIS)
		dev_err(chan2dev(&atchan->chan), "request overflow error!!!");

	/* Channel must be disabled first as it's not done automatically */
	at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
	while (at_xdmac_read(atxdmac, AT_XDMAC_GS) & atchan->mask)
		cpu_relax();

	bad_desc = list_first_entry(&atchan->xfers_list,
				    struct at_xdmac_desc,
				    xfer_node);

	/* Print bad descriptor's details if needed */
	dev_dbg(chan2dev(&atchan->chan),
		"%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
		__func__, &bad_desc->lld.mbr_sa, &bad_desc->lld.mbr_da,
		bad_desc->lld.mbr_ubc);

	pm_runtime_mark_last_busy(atxdmac->dev);
	pm_runtime_put_autosuspend(atxdmac->dev);

	/* Then continue with usual descriptor management */
}

static void at_xdmac_tasklet(struct tasklet_struct *t)
{
	struct at_xdmac_chan	*atchan = from_tasklet(atchan, t, tasklet);
	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
	struct at_xdmac_desc	*desc;
	struct dma_async_tx_descriptor *txd;
	u32			error_mask;

	if (at_xdmac_chan_is_cyclic(atchan))
		return at_xdmac_handle_cyclic(atchan);

	error_mask = AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS |
		AT_XDMAC_CIS_ROIS;

	spin_lock_irq(&atchan->lock);

	dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n",
		__func__, atchan->irq_status);

	if (!(atchan->irq_status & AT_XDMAC_CIS_LIS) &&
	    !(atchan->irq_status & error_mask)) {
		spin_unlock_irq(&atchan->lock);
		return;
	}

	if (atchan->irq_status & error_mask)
		at_xdmac_handle_error(atchan);

	desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc,
				xfer_node);
	dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
	if (!desc->active_xfer) {
		dev_err(chan2dev(&atchan->chan), "Xfer not active: exiting");
		spin_unlock_irq(&atchan->lock);
		return;
	}

	txd = &desc->tx_dma_desc;
	dma_cookie_complete(txd);
	/* Remove the transfer from the transfer list. */
	list_del(&desc->xfer_node);
	spin_unlock_irq(&atchan->lock);

	if (txd->flags & DMA_PREP_INTERRUPT)
		dmaengine_desc_get_callback_invoke(txd, NULL);

	dma_run_dependencies(txd);

	spin_lock_irq(&atchan->lock);
	/* Move the xfer descriptors into the free descriptors list. */
	list_splice_tail_init(&desc->descs_list, &atchan->free_descs_list);
	at_xdmac_advance_work(atchan);
	spin_unlock_irq(&atchan->lock);

	/*
	 * Decrement runtime PM ref counter incremented in
	 * at_xdmac_start_xfer().
	 */
	pm_runtime_mark_last_busy(atxdmac->dev);
	pm_runtime_put_autosuspend(atxdmac->dev);
}

static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id)
{
	struct at_xdmac		*atxdmac = (struct at_xdmac *)dev_id;
	struct at_xdmac_chan	*atchan;
	u32			imr, status, pending;
	u32			chan_imr, chan_status;
	int			i, ret = IRQ_NONE;

	do {
		imr = at_xdmac_read(atxdmac, AT_XDMAC_GIM);
		status = at_xdmac_read(atxdmac, AT_XDMAC_GIS);
		pending = status & imr;

		dev_vdbg(atxdmac->dma.dev,
			 "%s: status=0x%08x, imr=0x%08x, pending=0x%08x\n",
			 __func__, status, imr, pending);

		if (!pending)
			break;

		/* We have to find which channel has generated the interrupt. */
		for (i = 0; i < atxdmac->dma.chancnt; i++) {
			if (!((1 << i) & pending))
				continue;

			atchan = &atxdmac->chan[i];
			chan_imr = at_xdmac_chan_read(atchan, AT_XDMAC_CIM);
			chan_status = at_xdmac_chan_read(atchan, AT_XDMAC_CIS);
			atchan->irq_status = chan_status & chan_imr;
			dev_vdbg(atxdmac->dma.dev,
				 "%s: chan%d: imr=0x%x, status=0x%x\n",
				 __func__, i, chan_imr, chan_status);
			dev_vdbg(chan2dev(&atchan->chan),
				 "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
				 __func__,
				 at_xdmac_chan_read(atchan, AT_XDMAC_CC),
				 at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
				 at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
				 at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
				 at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
				 at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));

			if (atchan->irq_status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS))
				at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);

			tasklet_schedule(&atchan->tasklet);
			ret = IRQ_HANDLED;
		}

	} while (pending);

	return ret;
}

static void at_xdmac_issue_pending(struct dma_chan *chan)
{
	struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
	unsigned long flags;

	dev_dbg(chan2dev(&atchan->chan), "%s\n", __func__);

	spin_lock_irqsave(&atchan->lock, flags);
	at_xdmac_advance_work(atchan);
	spin_unlock_irqrestore(&atchan->lock, flags);

	return;
}

static int at_xdmac_device_config(struct dma_chan *chan,
				  struct dma_slave_config *config)
{
	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
	int ret;
	unsigned long		flags;

	dev_dbg(chan2dev(chan), "%s\n", __func__);

	spin_lock_irqsave(&atchan->lock, flags);
	ret = at_xdmac_set_slave_config(chan, config);
	spin_unlock_irqrestore(&atchan->lock, flags);

	return ret;
}

static void at_xdmac_device_pause_set(struct at_xdmac *atxdmac,
				      struct at_xdmac_chan *atchan)
{
	at_xdmac_write(atxdmac, atxdmac->layout->grws, atchan->mask);
	while (at_xdmac_chan_read(atchan, AT_XDMAC_CC) &
	       (AT_XDMAC_CC_WRIP | AT_XDMAC_CC_RDIP))
		cpu_relax();
}

static void at_xdmac_device_pause_internal(struct at_xdmac_chan *atchan)
{
	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
	unsigned long		flags;

	spin_lock_irqsave(&atchan->lock, flags);
	set_bit(AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, &atchan->status);
	at_xdmac_device_pause_set(atxdmac, atchan);
	spin_unlock_irqrestore(&atchan->lock, flags);
}

static int at_xdmac_device_pause(struct dma_chan *chan)
{
	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
	unsigned long		flags;
	int			ret;

	dev_dbg(chan2dev(chan), "%s\n", __func__);

	if (test_and_set_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status))
		return 0;

	ret = pm_runtime_resume_and_get(atxdmac->dev);
	if (ret < 0)
		return ret;

	spin_lock_irqsave(&atchan->lock, flags);

	at_xdmac_device_pause_set(atxdmac, atchan);
	/* Decrement runtime PM ref counter for each active descriptor. */
	at_xdmac_runtime_suspend_descriptors(atchan);

	spin_unlock_irqrestore(&atchan->lock, flags);

	pm_runtime_mark_last_busy(atxdmac->dev);
	pm_runtime_put_autosuspend(atxdmac->dev);

	return 0;
}

static void at_xdmac_device_resume_internal(struct at_xdmac_chan *atchan)
{
	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
	unsigned long		flags;

	spin_lock_irqsave(&atchan->lock, flags);
	at_xdmac_write(atxdmac, atxdmac->layout->grwr, atchan->mask);
	clear_bit(AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, &atchan->status);
	spin_unlock_irqrestore(&atchan->lock, flags);
}

static int at_xdmac_device_resume(struct dma_chan *chan)
{
	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
	unsigned long		flags;
	int			ret;

	dev_dbg(chan2dev(chan), "%s\n", __func__);

	ret = pm_runtime_resume_and_get(atxdmac->dev);
	if (ret < 0)
		return ret;

	spin_lock_irqsave(&atchan->lock, flags);
	if (!at_xdmac_chan_is_paused(atchan))
		goto unlock;

	/* Increment runtime PM ref counter for each active descriptor. */
	ret = at_xdmac_runtime_resume_descriptors(atchan);
	if (ret < 0)
		goto unlock;

	at_xdmac_write(atxdmac, atxdmac->layout->grwr, atchan->mask);
	clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);

unlock:
	spin_unlock_irqrestore(&atchan->lock, flags);
	pm_runtime_mark_last_busy(atxdmac->dev);
	pm_runtime_put_autosuspend(atxdmac->dev);

	return ret;
}

static int at_xdmac_device_terminate_all(struct dma_chan *chan)
{
	struct at_xdmac_desc	*desc, *_desc;
	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
	unsigned long		flags;
	int			ret;

	dev_dbg(chan2dev(chan), "%s\n", __func__);

	ret = pm_runtime_resume_and_get(atxdmac->dev);
	if (ret < 0)
		return ret;

	spin_lock_irqsave(&atchan->lock, flags);
	at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
	while (at_xdmac_read(atxdmac, AT_XDMAC_GS) & atchan->mask)
		cpu_relax();

	/* Cancel all pending transfers. */
	list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) {
		list_del(&desc->xfer_node);
		list_splice_tail_init(&desc->descs_list,
				      &atchan->free_descs_list);
		/*
		 * We incremented the runtime PM reference count on
		 * at_xdmac_start_xfer() for this descriptor. Now it's time
		 * to release it.
		 */
		if (desc->active_xfer) {
			pm_runtime_put_autosuspend(atxdmac->dev);
			pm_runtime_mark_last_busy(atxdmac->dev);
		}
	}

	clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
	clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status);
	spin_unlock_irqrestore(&atchan->lock, flags);

	pm_runtime_mark_last_busy(atxdmac->dev);
	pm_runtime_put_autosuspend(atxdmac->dev);

	return 0;
}

static int at_xdmac_alloc_chan_resources(struct dma_chan *chan)
{
	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
	struct at_xdmac_desc	*desc;
	int			i;

	if (at_xdmac_chan_is_enabled(atchan)) {
		dev_err(chan2dev(chan),
			"can't allocate channel resources (channel enabled)\n");
		return -EIO;
	}

	if (!list_empty(&atchan->free_descs_list)) {
		dev_err(chan2dev(chan),
			"can't allocate channel resources (channel not free from a previous use)\n");
		return -EIO;
	}

	for (i = 0; i < init_nr_desc_per_channel; i++) {
		desc = at_xdmac_alloc_desc(chan, GFP_KERNEL);
		if (!desc) {
			if (i == 0) {
				dev_warn(chan2dev(chan),
					 "can't allocate any descriptors\n");
				return -EIO;
			}
			dev_warn(chan2dev(chan),
				"only %d descriptors have been allocated\n", i);
			break;
		}
		list_add_tail(&desc->desc_node, &atchan->free_descs_list);
	}

	dma_cookie_init(chan);

	dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i);

	return i;
}

static void at_xdmac_free_chan_resources(struct dma_chan *chan)
{
	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
	struct at_xdmac		*atxdmac = to_at_xdmac(chan->device);
	struct at_xdmac_desc	*desc, *_desc;

	list_for_each_entry_safe(desc, _desc, &atchan->free_descs_list, desc_node) {
		dev_dbg(chan2dev(chan), "%s: freeing descriptor %p\n", __func__, desc);
		list_del(&desc->desc_node);
		dma_pool_free(atxdmac->at_xdmac_desc_pool, desc, desc->tx_dma_desc.phys);
	}

	return;
}

static void at_xdmac_axi_config(struct platform_device *pdev)
{
	struct at_xdmac	*atxdmac = (struct at_xdmac *)platform_get_drvdata(pdev);
	bool dev_m2m = false;
	u32 dma_requests;

	if (!atxdmac->layout->axi_config)
		return; /* Not supported */

	if (!of_property_read_u32(pdev->dev.of_node, "dma-requests",
				  &dma_requests)) {
		dev_info(&pdev->dev, "controller in mem2mem mode.\n");
		dev_m2m = true;
	}

	if (dev_m2m) {
		at_xdmac_write(atxdmac, AT_XDMAC_GCFG, AT_XDMAC_GCFG_M2M);
		at_xdmac_write(atxdmac, AT_XDMAC_GWAC, AT_XDMAC_GWAC_M2M);
	} else {
		at_xdmac_write(atxdmac, AT_XDMAC_GCFG, AT_XDMAC_GCFG_P2M);
		at_xdmac_write(atxdmac, AT_XDMAC_GWAC, AT_XDMAC_GWAC_P2M);
	}
}

static int __maybe_unused atmel_xdmac_prepare(struct device *dev)
{
	struct at_xdmac		*atxdmac = dev_get_drvdata(dev);
	struct dma_chan		*chan, *_chan;

	list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
		struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);

		/* Wait for transfer completion, except in cyclic case. */
		if (at_xdmac_chan_is_enabled(atchan) && !at_xdmac_chan_is_cyclic(atchan))
			return -EAGAIN;
	}
	return 0;
}

static int __maybe_unused atmel_xdmac_suspend(struct device *dev)
{
	struct at_xdmac		*atxdmac = dev_get_drvdata(dev);
	struct dma_chan		*chan, *_chan;
	int			ret;

	ret = pm_runtime_resume_and_get(atxdmac->dev);
	if (ret < 0)
		return ret;

	list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
		struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);

		atchan->save_cc = at_xdmac_chan_read(atchan, AT_XDMAC_CC);
		if (at_xdmac_chan_is_cyclic(atchan)) {
			if (!at_xdmac_chan_is_paused(atchan)) {
				dev_warn(chan2dev(chan), "%s: channel %d not paused\n",
					 __func__, chan->chan_id);
				at_xdmac_device_pause_internal(atchan);
				at_xdmac_runtime_suspend_descriptors(atchan);
			}
			atchan->save_cim = at_xdmac_chan_read(atchan, AT_XDMAC_CIM);
			atchan->save_cnda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA);
			atchan->save_cndc = at_xdmac_chan_read(atchan, AT_XDMAC_CNDC);
		}
	}
	atxdmac->save_gim = at_xdmac_read(atxdmac, AT_XDMAC_GIM);
	atxdmac->save_gs = at_xdmac_read(atxdmac, AT_XDMAC_GS);

	at_xdmac_off(atxdmac, false);
	pm_runtime_mark_last_busy(atxdmac->dev);
	pm_runtime_put_noidle(atxdmac->dev);
	clk_disable_unprepare(atxdmac->clk);

	return 0;
}

static int __maybe_unused atmel_xdmac_resume(struct device *dev)
{
	struct at_xdmac		*atxdmac = dev_get_drvdata(dev);
	struct at_xdmac_chan	*atchan;
	struct dma_chan		*chan, *_chan;
	struct platform_device	*pdev = container_of(dev, struct platform_device, dev);
	int			i, ret;

	ret = clk_prepare_enable(atxdmac->clk);
	if (ret)
		return ret;

	pm_runtime_get_noresume(atxdmac->dev);

	at_xdmac_axi_config(pdev);

	/* Clear pending interrupts. */
	for (i = 0; i < atxdmac->dma.chancnt; i++) {
		atchan = &atxdmac->chan[i];
		while (at_xdmac_chan_read(atchan, AT_XDMAC_CIS))
			cpu_relax();
	}

	at_xdmac_write(atxdmac, AT_XDMAC_GIE, atxdmac->save_gim);
	list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
		atchan = to_at_xdmac_chan(chan);

		at_xdmac_chan_write(atchan, AT_XDMAC_CC, atchan->save_cc);
		if (at_xdmac_chan_is_cyclic(atchan)) {
			/*
			 * Resume only channels not explicitly paused by
			 * consumers.
			 */
			if (at_xdmac_chan_is_paused_internal(atchan)) {
				ret = at_xdmac_runtime_resume_descriptors(atchan);
				if (ret < 0)
					return ret;
				at_xdmac_device_resume_internal(atchan);
			}

			/*
			 * We may resume from a deep sleep state where power
			 * to DMA controller is cut-off. Thus, restore the
			 * suspend state of channels set though dmaengine API.
			 */
			else if (at_xdmac_chan_is_paused(atchan))
				at_xdmac_device_pause_set(atxdmac, atchan);

			at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, atchan->save_cnda);
			at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc);
			at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim);
			wmb();
			if (atxdmac->save_gs & atchan->mask)
				at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask);
		}
	}

	pm_runtime_mark_last_busy(atxdmac->dev);
	pm_runtime_put_autosuspend(atxdmac->dev);

	return 0;
}

static int __maybe_unused atmel_xdmac_runtime_suspend(struct device *dev)
{
	struct at_xdmac *atxdmac = dev_get_drvdata(dev);

	clk_disable(atxdmac->clk);

	return 0;
}

static int __maybe_unused atmel_xdmac_runtime_resume(struct device *dev)
{
	struct at_xdmac *atxdmac = dev_get_drvdata(dev);

	return clk_enable(atxdmac->clk);
}

static int at_xdmac_probe(struct platform_device *pdev)
{
	struct at_xdmac	*atxdmac;
	int		irq, nr_channels, i, ret;
	void __iomem	*base;
	u32		reg;

	irq = platform_get_irq(pdev, 0);
	if (irq < 0)
		return irq;

	base = devm_platform_ioremap_resource(pdev, 0);
	if (IS_ERR(base))
		return PTR_ERR(base);

	/*
	 * Read number of xdmac channels, read helper function can't be used
	 * since atxdmac is not yet allocated and we need to know the number
	 * of channels to do the allocation.
	 */
	reg = readl_relaxed(base + AT_XDMAC_GTYPE);
	nr_channels = AT_XDMAC_NB_CH(reg);
	if (nr_channels > AT_XDMAC_MAX_CHAN) {
		dev_err(&pdev->dev, "invalid number of channels (%u)\n",
			nr_channels);
		return -EINVAL;
	}

	atxdmac = devm_kzalloc(&pdev->dev,
			       struct_size(atxdmac, chan, nr_channels),
			       GFP_KERNEL);
	if (!atxdmac) {
		dev_err(&pdev->dev, "can't allocate at_xdmac structure\n");
		return -ENOMEM;
	}

	atxdmac->regs = base;
	atxdmac->irq = irq;
	atxdmac->dev = &pdev->dev;

	atxdmac->layout = of_device_get_match_data(&pdev->dev);
	if (!atxdmac->layout)
		return -ENODEV;

	atxdmac->clk = devm_clk_get(&pdev->dev, "dma_clk");
	if (IS_ERR(atxdmac->clk)) {
		dev_err(&pdev->dev, "can't get dma_clk\n");
		return PTR_ERR(atxdmac->clk);
	}

	/* Do not use dev res to prevent races with tasklet */
	ret = request_irq(atxdmac->irq, at_xdmac_interrupt, 0, "at_xdmac", atxdmac);
	if (ret) {
		dev_err(&pdev->dev, "can't request irq\n");
		return ret;
	}

	ret = clk_prepare_enable(atxdmac->clk);
	if (ret) {
		dev_err(&pdev->dev, "can't prepare or enable clock\n");
		goto err_free_irq;
	}

	atxdmac->at_xdmac_desc_pool =
		dmam_pool_create(dev_name(&pdev->dev), &pdev->dev,
				sizeof(struct at_xdmac_desc), 4, 0);
	if (!atxdmac->at_xdmac_desc_pool) {
		dev_err(&pdev->dev, "no memory for descriptors dma pool\n");
		ret = -ENOMEM;
		goto err_clk_disable;
	}

	dma_cap_set(DMA_CYCLIC, atxdmac->dma.cap_mask);
	dma_cap_set(DMA_INTERLEAVE, atxdmac->dma.cap_mask);
	dma_cap_set(DMA_MEMCPY, atxdmac->dma.cap_mask);
	dma_cap_set(DMA_MEMSET, atxdmac->dma.cap_mask);
	dma_cap_set(DMA_MEMSET_SG, atxdmac->dma.cap_mask);
	dma_cap_set(DMA_SLAVE, atxdmac->dma.cap_mask);
	/*
	 * Without DMA_PRIVATE the driver is not able to allocate more than
	 * one channel, second allocation fails in private_candidate.
	 */
	dma_cap_set(DMA_PRIVATE, atxdmac->dma.cap_mask);
	atxdmac->dma.dev				= &pdev->dev;
	atxdmac->dma.device_alloc_chan_resources	= at_xdmac_alloc_chan_resources;
	atxdmac->dma.device_free_chan_resources		= at_xdmac_free_chan_resources;
	atxdmac->dma.device_tx_status			= at_xdmac_tx_status;
	atxdmac->dma.device_issue_pending		= at_xdmac_issue_pending;
	atxdmac->dma.device_prep_dma_cyclic		= at_xdmac_prep_dma_cyclic;
	atxdmac->dma.device_prep_interleaved_dma	= at_xdmac_prep_interleaved;
	atxdmac->dma.device_prep_dma_memcpy		= at_xdmac_prep_dma_memcpy;
	atxdmac->dma.device_prep_dma_memset		= at_xdmac_prep_dma_memset;
	atxdmac->dma.device_prep_dma_memset_sg		= at_xdmac_prep_dma_memset_sg;
	atxdmac->dma.device_prep_slave_sg		= at_xdmac_prep_slave_sg;
	atxdmac->dma.device_config			= at_xdmac_device_config;
	atxdmac->dma.device_pause			= at_xdmac_device_pause;
	atxdmac->dma.device_resume			= at_xdmac_device_resume;
	atxdmac->dma.device_terminate_all		= at_xdmac_device_terminate_all;
	atxdmac->dma.src_addr_widths = AT_XDMAC_DMA_BUSWIDTHS;
	atxdmac->dma.dst_addr_widths = AT_XDMAC_DMA_BUSWIDTHS;
	atxdmac->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
	atxdmac->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;

	platform_set_drvdata(pdev, atxdmac);

	pm_runtime_set_autosuspend_delay(&pdev->dev, 500);
	pm_runtime_use_autosuspend(&pdev->dev);
	pm_runtime_set_active(&pdev->dev);
	pm_runtime_enable(&pdev->dev);
	pm_runtime_get_noresume(&pdev->dev);

	/* Init channels. */
	INIT_LIST_HEAD(&atxdmac->dma.channels);

	/* Disable all chans and interrupts. */
	at_xdmac_off(atxdmac, true);

	for (i = 0; i < nr_channels; i++) {
		struct at_xdmac_chan *atchan = &atxdmac->chan[i];

		atchan->chan.device = &atxdmac->dma;
		list_add_tail(&atchan->chan.device_node,
			      &atxdmac->dma.channels);

		atchan->ch_regs = at_xdmac_chan_reg_base(atxdmac, i);
		atchan->mask = 1 << i;

		spin_lock_init(&atchan->lock);
		INIT_LIST_HEAD(&atchan->xfers_list);
		INIT_LIST_HEAD(&atchan->free_descs_list);
		tasklet_setup(&atchan->tasklet, at_xdmac_tasklet);

		/* Clear pending interrupts. */
		while (at_xdmac_chan_read(atchan, AT_XDMAC_CIS))
			cpu_relax();
	}

	ret = dma_async_device_register(&atxdmac->dma);
	if (ret) {
		dev_err(&pdev->dev, "fail to register DMA engine device\n");
		goto err_pm_disable;
	}

	ret = of_dma_controller_register(pdev->dev.of_node,
					 at_xdmac_xlate, atxdmac);
	if (ret) {
		dev_err(&pdev->dev, "could not register of dma controller\n");
		goto err_dma_unregister;
	}

	dev_info(&pdev->dev, "%d channels, mapped at 0x%p\n",
		 nr_channels, atxdmac->regs);

	at_xdmac_axi_config(pdev);

	pm_runtime_mark_last_busy(&pdev->dev);
	pm_runtime_put_autosuspend(&pdev->dev);

	return 0;

err_dma_unregister:
	dma_async_device_unregister(&atxdmac->dma);
err_pm_disable:
	pm_runtime_put_noidle(&pdev->dev);
	pm_runtime_disable(&pdev->dev);
	pm_runtime_set_suspended(&pdev->dev);
	pm_runtime_dont_use_autosuspend(&pdev->dev);
err_clk_disable:
	clk_disable_unprepare(atxdmac->clk);
err_free_irq:
	free_irq(atxdmac->irq, atxdmac);
	return ret;
}

static int at_xdmac_remove(struct platform_device *pdev)
{
	struct at_xdmac	*atxdmac = (struct at_xdmac *)platform_get_drvdata(pdev);
	int		i;

	at_xdmac_off(atxdmac, true);
	of_dma_controller_free(pdev->dev.of_node);
	dma_async_device_unregister(&atxdmac->dma);
	pm_runtime_disable(atxdmac->dev);
	pm_runtime_set_suspended(&pdev->dev);
	pm_runtime_dont_use_autosuspend(&pdev->dev);
	clk_disable_unprepare(atxdmac->clk);

	free_irq(atxdmac->irq, atxdmac);

	for (i = 0; i < atxdmac->dma.chancnt; i++) {
		struct at_xdmac_chan *atchan = &atxdmac->chan[i];

		tasklet_kill(&atchan->tasklet);
		at_xdmac_free_chan_resources(&atchan->chan);
	}

	return 0;
}

static const struct dev_pm_ops __maybe_unused atmel_xdmac_dev_pm_ops = {
	.prepare	= atmel_xdmac_prepare,
	SET_LATE_SYSTEM_SLEEP_PM_OPS(atmel_xdmac_suspend, atmel_xdmac_resume)
	SET_RUNTIME_PM_OPS(atmel_xdmac_runtime_suspend,
			   atmel_xdmac_runtime_resume, NULL)
};

static const struct of_device_id atmel_xdmac_dt_ids[] = {
	{
		.compatible = "atmel,sama5d4-dma",
		.data = &at_xdmac_sama5d4_layout,
	}, {
		.compatible = "microchip,sama7g5-dma",
		.data = &at_xdmac_sama7g5_layout,
	}, {
		/* sentinel */
	}
};
MODULE_DEVICE_TABLE(of, atmel_xdmac_dt_ids);

static struct platform_driver at_xdmac_driver = {
	.probe		= at_xdmac_probe,
	.remove		= at_xdmac_remove,
	.driver = {
		.name		= "at_xdmac",
		.of_match_table	= of_match_ptr(atmel_xdmac_dt_ids),
		.pm		= pm_ptr(&atmel_xdmac_dev_pm_ops),
	}
};

static int __init at_xdmac_init(void)
{
	return platform_driver_register(&at_xdmac_driver);
}
subsys_initcall(at_xdmac_init);

static void __exit at_xdmac_exit(void)
{
	platform_driver_unregister(&at_xdmac_driver);
}
module_exit(at_xdmac_exit);

MODULE_DESCRIPTION("Atmel Extended DMA Controller driver");
MODULE_AUTHOR("Ludovic Desroches <ludovic.desroches@atmel.com>");
MODULE_LICENSE("GPL");