bcm27xx: sync 5.4 patches with RPi Foundation

Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
master
Álvaro Fernández Rojas 4 years ago
parent d890f85e59
commit 0f6d04457a

@ -184,7 +184,6 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
CONFIG_FREEZER=y
# CONFIG_FSL_QDMA is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_MBCACHE=y
CONFIG_FS_POSIX_ACL=y
@ -286,10 +285,7 @@ CONFIG_MAX_RAW_DEVS=256
CONFIG_MEMFD_CREATE=y
CONFIG_MEMORY_ISOLATION=y
CONFIG_MFD_CORE=y
# CONFIG_MFD_LOCHNAGAR is not set
# CONFIG_MFD_ROHM_BD70528 is not set
# CONFIG_MFD_RPISENSE_CORE is not set
# CONFIG_MFD_STPMIC1 is not set
CONFIG_MFD_SYSCON=y
CONFIG_MIGHT_HAVE_CACHE_L2X0=y
CONFIG_MIGRATION=y

@ -30,14 +30,12 @@ CONFIG_ARCH_HAVE_CUSTOM_GPIO_H=y
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_KEEP_MEMBLOCK=y
CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y
# CONFIG_ARCH_MILBEAUT is not set
CONFIG_ARCH_MULTIPLATFORM=y
CONFIG_ARCH_MULTI_V6_V7=y
CONFIG_ARCH_MULTI_V7=y
CONFIG_ARCH_NR_GPIO=0
CONFIG_ARCH_OPTIONAL_KERNEL_RWX=y
CONFIG_ARCH_OPTIONAL_KERNEL_RWX_DEFAULT=y
# CONFIG_ARCH_RDA is not set
CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y
CONFIG_ARCH_SUPPORTS_UPROBES=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
@ -52,9 +50,6 @@ CONFIG_ARM_ARCH_TIMER=y
CONFIG_ARM_ARCH_TIMER_EVTSTREAM=y
# CONFIG_ARM_BCM2835_CPUFREQ is not set
CONFIG_ARM_CPU_SUSPEND=y
# CONFIG_ARM_ERRATA_814220 is not set
# CONFIG_ARM_ERRATA_857271 is not set
# CONFIG_ARM_ERRATA_857272 is not set
CONFIG_ARM_GIC=y
CONFIG_ARM_HAS_SG_CHAIN=y
CONFIG_ARM_L1_CACHE_SHIFT=6
@ -212,7 +207,6 @@ CONFIG_DUMMY_CONSOLE=y
CONFIG_EDAC_ATOMIC_SCRUB=y
CONFIG_EDAC_SUPPORT=y
CONFIG_ENABLE_MUST_CHECK=y
# CONFIG_ENERGY_MODEL is not set
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
@ -242,7 +236,6 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
CONFIG_FREEZER=y
# CONFIG_FSL_QDMA is not set
CONFIG_FS_ENCRYPTION=y
CONFIG_FS_IOMAP=y
CONFIG_FS_MBCACHE=y
@ -277,7 +270,6 @@ CONFIG_GPIOLIB_IRQCHIP=y
CONFIG_GPIO_BCM_VIRT=y
CONFIG_GPIO_RASPBERRYPI_EXP=y
CONFIG_GPIO_SYSFS=y
# CONFIG_GVE is not set
CONFIG_HANDLE_DOMAIN_IRQ=y
CONFIG_HARDEN_BRANCH_PREDICTOR=y
CONFIG_HARDIRQS_SW_RESEND=y
@ -370,10 +362,7 @@ CONFIG_MDIO_DEVICE=y
CONFIG_MEMFD_CREATE=y
CONFIG_MEMORY_ISOLATION=y
CONFIG_MFD_CORE=y
# CONFIG_MFD_LOCHNAGAR is not set
# CONFIG_MFD_ROHM_BD70528 is not set
# CONFIG_MFD_RPISENSE_CORE is not set
# CONFIG_MFD_STPMIC1 is not set
CONFIG_MFD_SYSCON=y
CONFIG_MICROCHIP_PHY=y
CONFIG_MIGHT_HAVE_CACHE_L2X0=y
@ -427,11 +416,9 @@ CONFIG_PCI=y
CONFIG_PCIEAER=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCIE_BRCMSTB=y
# CONFIG_PCIE_BW is not set
CONFIG_PCIE_PME=y
CONFIG_PCI_DOMAINS=y
CONFIG_PCI_DOMAINS_GENERIC=y
# CONFIG_PCI_MESON is not set
CONFIG_PCI_MSI=y
CONFIG_PCI_MSI_IRQ_DOMAIN=y
CONFIG_PERF_USE_VMALLOC=y
@ -512,7 +499,6 @@ CONFIG_TIMER_PROBE=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_TREE_RCU=y
CONFIG_TREE_SRCU=y
# CONFIG_TRUSTED_FOUNDATIONS is not set
CONFIG_UEVENT_HELPER_PATH=""
# CONFIG_UID16 is not set
CONFIG_UNCOMPRESS_INCLUDE="debug/uncompress.h"

@ -157,6 +157,7 @@ CONFIG_BLK_DEV_SD=y
CONFIG_BLK_MQ_PCI=y
CONFIG_BLK_PM=y
CONFIG_BLK_SCSI_REQUEST=y
CONFIG_BOUNCE=y
CONFIG_BRCMSTB_THERMAL=y
CONFIG_BRCM_CHAR_DRIVERS=y
CONFIG_CAVIUM_ERRATUM_22375=y
@ -253,7 +254,6 @@ CONFIG_DUMMY_CONSOLE=y
CONFIG_EDAC_SUPPORT=y
CONFIG_EFI_EARLYCON=y
CONFIG_ENABLE_MUST_CHECK=y
# CONFIG_ENERGY_MODEL is not set
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
@ -283,7 +283,6 @@ CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
CONFIG_FRAME_POINTER=y
CONFIG_FREEZER=y
CONFIG_FSL_ERRATUM_A008585=y
# CONFIG_FSL_QDMA is not set
CONFIG_FS_ENCRYPTION=y
CONFIG_FS_IOMAP=y
CONFIG_FS_MBCACHE=y
@ -323,7 +322,6 @@ CONFIG_GPIOLIB_IRQCHIP=y
CONFIG_GPIO_BCM_VIRT=y
CONFIG_GPIO_RASPBERRYPI_EXP=y
CONFIG_GPIO_SYSFS=y
# CONFIG_GVE is not set
CONFIG_HANDLE_DOMAIN_IRQ=y
CONFIG_HARDEN_BRANCH_PREDICTOR=y
CONFIG_HARDIRQS_SW_RESEND=y
@ -446,10 +444,7 @@ CONFIG_MEMFD_CREATE=y
# CONFIG_MEMORY_HOTPLUG is not set
CONFIG_MEMORY_ISOLATION=y
CONFIG_MFD_CORE=y
# CONFIG_MFD_LOCHNAGAR is not set
# CONFIG_MFD_ROHM_BD70528 is not set
# CONFIG_MFD_RPISENSE_CORE is not set
# CONFIG_MFD_STPMIC1 is not set
CONFIG_MFD_SYSCON=y
CONFIG_MICROCHIP_PHY=y
CONFIG_MIGRATION=y
@ -481,7 +476,6 @@ CONFIG_NO_HZ_IDLE=y
CONFIG_NR_CPUS=4
# CONFIG_NUMA is not set
CONFIG_NVMEM=y
# CONFIG_NVMEM_REBOOT_MODE is not set
# CONFIG_OCTEONTX2_AF is not set
CONFIG_OF=y
CONFIG_OF_ADDRESS=y
@ -503,7 +497,6 @@ CONFIG_PCI=y
# CONFIG_PCIE_BRCMSTB is not set
CONFIG_PCI_DOMAINS=y
CONFIG_PCI_DOMAINS_GENERIC=y
# CONFIG_PCI_MESON is not set
CONFIG_PCI_MSI=y
CONFIG_PCI_MSI_IRQ_DOMAIN=y
CONFIG_PGTABLE_LEVELS=3

@ -160,6 +160,7 @@ CONFIG_BLK_DEV_SD=y
CONFIG_BLK_MQ_PCI=y
CONFIG_BLK_PM=y
CONFIG_BLK_SCSI_REQUEST=y
CONFIG_BOUNCE=y
CONFIG_BRCMSTB_THERMAL=y
CONFIG_BRCM_CHAR_DRIVERS=y
CONFIG_BROADCOM_PHY=y
@ -258,7 +259,6 @@ CONFIG_DUMMY_CONSOLE=y
CONFIG_EDAC_SUPPORT=y
CONFIG_EFI_EARLYCON=y
CONFIG_ENABLE_MUST_CHECK=y
# CONFIG_ENERGY_MODEL is not set
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
@ -288,7 +288,6 @@ CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
CONFIG_FRAME_POINTER=y
CONFIG_FREEZER=y
CONFIG_FSL_ERRATUM_A008585=y
# CONFIG_FSL_QDMA is not set
CONFIG_FS_ENCRYPTION=y
CONFIG_FS_IOMAP=y
CONFIG_FS_MBCACHE=y
@ -329,7 +328,6 @@ CONFIG_GPIOLIB_IRQCHIP=y
CONFIG_GPIO_BCM_VIRT=y
CONFIG_GPIO_RASPBERRYPI_EXP=y
CONFIG_GPIO_SYSFS=y
# CONFIG_GVE is not set
CONFIG_HANDLE_DOMAIN_IRQ=y
CONFIG_HARDEN_BRANCH_PREDICTOR=y
CONFIG_HARDIRQS_SW_RESEND=y
@ -453,10 +451,7 @@ CONFIG_MEMFD_CREATE=y
# CONFIG_MEMORY_HOTPLUG is not set
CONFIG_MEMORY_ISOLATION=y
CONFIG_MFD_CORE=y
# CONFIG_MFD_LOCHNAGAR is not set
# CONFIG_MFD_ROHM_BD70528 is not set
# CONFIG_MFD_RPISENSE_CORE is not set
# CONFIG_MFD_STPMIC1 is not set
CONFIG_MFD_SYSCON=y
CONFIG_MIGRATION=y
CONFIG_MMC=y
@ -487,7 +482,6 @@ CONFIG_NO_HZ_IDLE=y
CONFIG_NR_CPUS=4
# CONFIG_NUMA is not set
CONFIG_NVMEM=y
# CONFIG_NVMEM_REBOOT_MODE is not set
# CONFIG_OCTEONTX2_AF is not set
CONFIG_OF=y
CONFIG_OF_ADDRESS=y
@ -509,11 +503,9 @@ CONFIG_PCIEAER=y
CONFIG_PCIEPORTBUS=y
# CONFIG_PCIE_AL is not set
CONFIG_PCIE_BRCMSTB=y
# CONFIG_PCIE_BW is not set
CONFIG_PCIE_PME=y
CONFIG_PCI_DOMAINS=y
CONFIG_PCI_DOMAINS_GENERIC=y
# CONFIG_PCI_MESON is not set
CONFIG_PCI_MSI=y
CONFIG_PCI_MSI_IRQ_DOMAIN=y
CONFIG_PGTABLE_LEVELS=3

@ -1,569 +0,0 @@
From d45590eb858ac7a2578d477791881ba7ffb1e615 Mon Sep 17 00:00:00 2001
From: Phil Elwell <phil@raspberrypi.org>
Date: Tue, 19 Feb 2019 22:06:59 +0000
Subject: [PATCH] PCI: brcmstb: Add dma-range mapping for inbound
traffic
The Broadcom STB PCIe host controller is intimately related to the
memory subsystem. This close relationship adds complexity to how cpu
system memory is mapped to PCIe memory. Ideally, this mapping is an
identity mapping, or an identity mapping off by a constant. Not so in
this case.
Consider the Broadcom reference board BCM97445LCC_4X8 which has 6 GB
of system memory. Here is how the PCIe controller maps the
system memory to PCIe memory:
memc0-a@[ 0....3fffffff] <=> pci@[ 0....3fffffff]
memc0-b@[100000000...13fffffff] <=> pci@[ 40000000....7fffffff]
memc1-a@[ 40000000....7fffffff] <=> pci@[ 80000000....bfffffff]
memc1-b@[300000000...33fffffff] <=> pci@[ c0000000....ffffffff]
memc2-a@[ 80000000....bfffffff] <=> pci@[100000000...13fffffff]
memc2-b@[c00000000...c3fffffff] <=> pci@[140000000...17fffffff]
Although there are some "gaps" that can be added between the
individual mappings by software, the permutation of memory regions for
the most part is fixed by HW. The solution of having something close
to an identity mapping is not possible.
The idea behind this HW design is that the same PCIe module can
act as an RC or EP, and if it acts as an EP it concatenates all
of system memory into a BAR so anything can be accessed. Unfortunately,
when the PCIe block is in the role of an RC it also presents this
"BAR" to downstream PCIe devices, rather than offering an identity map
between its system memory and PCIe space.
Suppose that an endpoint driver allocs some DMA memory. Suppose this
memory is located at 0x6000_0000, which is in the middle of memc1-a.
The driver wants a dma_addr_t value that it can pass on to the EP to
use. Without doing any custom mapping, the EP will use this value for
DMA: the driver will get a dma_addr_t equal to 0x6000_0000. But this
won't work; the device needs a dma_addr_t that reflects the PCIe space
address, namely 0xa000_0000.
So, essentially the solution to this problem must modify the
dma_addr_t returned by the DMA routines routines. There are two
ways (I know of) of doing this:
(a) overriding/redefining the dma_to_phys() and phys_to_dma() calls
that are used by the dma_ops routines. This is the approach of
arch/mips/cavium-octeon/dma-octeon.c
In ARM and ARM64 these two routines are defiend in asm/dma-mapping.h
as static inline functions.
(b) Subscribe to a notifier that notifies when a device is added to a
bus. When this happens, set_dma_ops() can be called for the device.
This method is mentioned in:
http://lxr.free-electrons.com/source/drivers/of/platform.c?v=3.16#L152
where it says as a comment
"In case if platform code need to use own special DMA
configuration, it can use Platform bus notifier and
handle BUS_NOTIFY_ADD_DEVICE event to fix up DMA
configuration."
Solution (b) is what this commit does. It uses its own set of
dma_ops which are wrappers around the arch_dma_ops. The
wrappers translate the dma addresses before/after invoking
the arch_dma_ops, as appropriate.
Signed-off-by: Jim Quinlan <jim2101024@gmail.com>
---
drivers/pci/controller/pcie-brcmstb.c | 420 +++++++++++++++++++++++++-
1 file changed, 411 insertions(+), 9 deletions(-)
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -4,6 +4,7 @@
#include <linux/clk.h>
#include <linux/compiler.h>
#include <linux/delay.h>
+#include <linux/dma-mapping.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
@@ -319,11 +320,307 @@ static struct pci_ops brcm_pcie_ops = {
((val & ~reg##_##field##_MASK) | \
(reg##_##field##_MASK & (field_val << reg##_##field##_SHIFT)))
+static const struct dma_map_ops *arch_dma_ops;
+static const struct dma_map_ops *brcm_dma_ops_ptr;
+static struct of_pci_range *dma_ranges;
+static int num_dma_ranges;
+
static phys_addr_t scb_size[BRCM_MAX_SCB];
static int num_memc;
static int num_pcie;
static DEFINE_MUTEX(brcm_pcie_lock);
+static dma_addr_t brcm_to_pci(dma_addr_t addr)
+{
+ struct of_pci_range *p;
+
+ if (!num_dma_ranges)
+ return addr;
+
+ for (p = dma_ranges; p < &dma_ranges[num_dma_ranges]; p++)
+ if (addr >= p->cpu_addr && addr < (p->cpu_addr + p->size))
+ return addr - p->cpu_addr + p->pci_addr;
+
+ return addr;
+}
+
+static dma_addr_t brcm_to_cpu(dma_addr_t addr)
+{
+ struct of_pci_range *p;
+
+ if (!num_dma_ranges)
+ return addr;
+
+ for (p = dma_ranges; p < &dma_ranges[num_dma_ranges]; p++)
+ if (addr >= p->pci_addr && addr < (p->pci_addr + p->size))
+ return addr - p->pci_addr + p->cpu_addr;
+
+ return addr;
+}
+
+static void *brcm_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+ gfp_t gfp, unsigned long attrs)
+{
+ void *ret;
+
+ ret = arch_dma_ops->alloc(dev, size, handle, gfp, attrs);
+ if (ret)
+ *handle = brcm_to_pci(*handle);
+ return ret;
+}
+
+static void brcm_free(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t handle, unsigned long attrs)
+{
+ handle = brcm_to_cpu(handle);
+ arch_dma_ops->free(dev, size, cpu_addr, handle, attrs);
+}
+
+static int brcm_mmap(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ unsigned long attrs)
+{
+ dma_addr = brcm_to_cpu(dma_addr);
+ return arch_dma_ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
+}
+
+static int brcm_get_sgtable(struct device *dev, struct sg_table *sgt,
+ void *cpu_addr, dma_addr_t handle, size_t size,
+ unsigned long attrs)
+{
+ handle = brcm_to_cpu(handle);
+ return arch_dma_ops->get_sgtable(dev, sgt, cpu_addr, handle, size,
+ attrs);
+}
+
+static dma_addr_t brcm_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ return brcm_to_pci(arch_dma_ops->map_page(dev, page, offset, size,
+ dir, attrs));
+}
+
+static void brcm_unmap_page(struct device *dev, dma_addr_t handle,
+ size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ handle = brcm_to_cpu(handle);
+ arch_dma_ops->unmap_page(dev, handle, size, dir, attrs);
+}
+
+static int brcm_map_sg(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ int i, j;
+ struct scatterlist *sg;
+
+ for_each_sg(sgl, sg, nents, i) {
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->dma_length = sg->length;
+#endif
+ sg->dma_address =
+ brcm_dma_ops_ptr->map_page(dev, sg_page(sg), sg->offset,
+ sg->length, dir, attrs);
+ if (dma_mapping_error(dev, sg->dma_address))
+ goto bad_mapping;
+ }
+ return nents;
+
+bad_mapping:
+ for_each_sg(sgl, sg, i, j)
+ brcm_dma_ops_ptr->unmap_page(dev, sg_dma_address(sg),
+ sg_dma_len(sg), dir, attrs);
+ return 0;
+}
+
+static void brcm_unmap_sg(struct device *dev,
+ struct scatterlist *sgl, int nents,
+ enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ int i;
+ struct scatterlist *sg;
+
+ for_each_sg(sgl, sg, nents, i)
+ brcm_dma_ops_ptr->unmap_page(dev, sg_dma_address(sg),
+ sg_dma_len(sg), dir, attrs);
+}
+
+static void brcm_sync_single_for_cpu(struct device *dev,
+ dma_addr_t handle, size_t size,
+ enum dma_data_direction dir)
+{
+ handle = brcm_to_cpu(handle);
+ arch_dma_ops->sync_single_for_cpu(dev, handle, size, dir);
+}
+
+static void brcm_sync_single_for_device(struct device *dev,
+ dma_addr_t handle, size_t size,
+ enum dma_data_direction dir)
+{
+ handle = brcm_to_cpu(handle);
+ arch_dma_ops->sync_single_for_device(dev, handle, size, dir);
+}
+
+static dma_addr_t brcm_map_resource(struct device *dev, phys_addr_t phys,
+ size_t size,
+ enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ if (arch_dma_ops->map_resource)
+ return brcm_to_pci(arch_dma_ops->map_resource
+ (dev, phys, size, dir, attrs));
+ return brcm_to_pci((dma_addr_t)phys);
+}
+
+static void brcm_unmap_resource(struct device *dev, dma_addr_t handle,
+ size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ if (arch_dma_ops->unmap_resource)
+ arch_dma_ops->unmap_resource(dev, brcm_to_cpu(handle), size,
+ dir, attrs);
+}
+
+void brcm_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nents, i)
+ brcm_dma_ops_ptr->sync_single_for_cpu(dev, sg_dma_address(sg),
+ sg->length, dir);
+}
+
+void brcm_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nents, i)
+ brcm_dma_ops_ptr->sync_single_for_device(dev,
+ sg_dma_address(sg),
+ sg->length, dir);
+}
+
+static int brcm_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+ return arch_dma_ops->mapping_error(dev, dma_addr);
+}
+
+static int brcm_dma_supported(struct device *dev, u64 mask)
+{
+ if (num_dma_ranges) {
+ /*
+ * It is our translated addresses that the EP will "see", so
+ * we check all of the ranges for the largest possible value.
+ */
+ int i;
+
+ for (i = 0; i < num_dma_ranges; i++)
+ if (dma_ranges[i].pci_addr + dma_ranges[i].size - 1
+ > mask)
+ return 0;
+ return 1;
+ }
+
+ return arch_dma_ops->dma_supported(dev, mask);
+}
+
+#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
+u64 brcm_get_required_mask)(struct device *dev)
+{
+ return arch_dma_ops->get_required_mask(dev);
+}
+#endif
+
+static const struct dma_map_ops brcm_dma_ops = {
+ .alloc = brcm_alloc,
+ .free = brcm_free,
+ .mmap = brcm_mmap,
+ .get_sgtable = brcm_get_sgtable,
+ .map_page = brcm_map_page,
+ .unmap_page = brcm_unmap_page,
+ .map_sg = brcm_map_sg,
+ .unmap_sg = brcm_unmap_sg,
+ .map_resource = brcm_map_resource,
+ .unmap_resource = brcm_unmap_resource,
+ .sync_single_for_cpu = brcm_sync_single_for_cpu,
+ .sync_single_for_device = brcm_sync_single_for_device,
+ .sync_sg_for_cpu = brcm_sync_sg_for_cpu,
+ .sync_sg_for_device = brcm_sync_sg_for_device,
+ .mapping_error = brcm_mapping_error,
+ .dma_supported = brcm_dma_supported,
+#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
+ .get_required_mask = brcm_get_required_mask,
+#endif
+};
+
+static void brcm_set_dma_ops(struct device *dev)
+{
+ int ret;
+
+ if (IS_ENABLED(CONFIG_ARM64)) {
+ /*
+ * We are going to invoke get_dma_ops(). That
+ * function, at this point in time, invokes
+ * get_arch_dma_ops(), and for ARM64 that function
+ * returns a pointer to dummy_dma_ops. So then we'd
+ * like to call arch_setup_dma_ops(), but that isn't
+ * exported. Instead, we call of_dma_configure(),
+ * which is exported, and this calls
+ * arch_setup_dma_ops(). Once we do this the call to
+ * get_dma_ops() will work properly because
+ * dev->dma_ops will be set.
+ */
+ ret = of_dma_configure(dev, dev->of_node, true);
+ if (ret) {
+ dev_err(dev, "of_dma_configure() failed: %d\n", ret);
+ return;
+ }
+ }
+
+ arch_dma_ops = get_dma_ops(dev);
+ if (!arch_dma_ops) {
+ dev_err(dev, "failed to get arch_dma_ops\n");
+ return;
+ }
+
+ set_dma_ops(dev, &brcm_dma_ops);
+}
+
+static int brcmstb_platform_notifier(struct notifier_block *nb,
+ unsigned long event, void *__dev)
+{
+ struct device *dev = __dev;
+
+ brcm_dma_ops_ptr = &brcm_dma_ops;
+ if (event != BUS_NOTIFY_ADD_DEVICE)
+ return NOTIFY_DONE;
+
+ brcm_set_dma_ops(dev);
+ return NOTIFY_OK;
+}
+
+static struct notifier_block brcmstb_platform_nb = {
+ .notifier_call = brcmstb_platform_notifier,
+};
+
+static int brcm_register_notifier(void)
+{
+ return bus_register_notifier(&pci_bus_type, &brcmstb_platform_nb);
+}
+
+static int brcm_unregister_notifier(void)
+{
+ return bus_unregister_notifier(&pci_bus_type, &brcmstb_platform_nb);
+}
+
static u32 rd_fld(void __iomem *p, u32 mask, int shift)
{
return (bcm_readl(p) & mask) >> shift;
@@ -597,9 +894,71 @@ static inline void brcm_pcie_perst_set(s
WR_FLD_RB(pcie->base, PCIE_MISC_PCIE_CTRL, PCIE_PERSTB, !val);
}
+static int pci_dma_range_parser_init(struct of_pci_range_parser *parser,
+ struct device_node *node)
+{
+ const int na = 3, ns = 2;
+ int rlen;
+
+ parser->node = node;
+ parser->pna = of_n_addr_cells(node);
+ parser->np = parser->pna + na + ns;
+
+ parser->range = of_get_property(node, "dma-ranges", &rlen);
+ if (!parser->range)
+ return -ENOENT;
+
+ parser->end = parser->range + rlen / sizeof(__be32);
+
+ return 0;
+}
+
+static int brcm_pcie_parse_map_dma_ranges(struct brcm_pcie *pcie)
+{
+ int i;
+ struct of_pci_range_parser parser;
+ struct device_node *dn = pcie->dn;
+
+ /*
+ * Parse dma-ranges property if present. If there are multiple
+ * PCIe controllers, we only have to parse from one of them since
+ * the others will have an identical mapping.
+ */
+ if (!pci_dma_range_parser_init(&parser, dn)) {
+ unsigned int max_ranges
+ = (parser.end - parser.range) / parser.np;
+
+ dma_ranges = kcalloc(max_ranges, sizeof(struct of_pci_range),
+ GFP_KERNEL);
+ if (!dma_ranges)
+ return -ENOMEM;
+
+ for (i = 0; of_pci_range_parser_one(&parser, dma_ranges + i);
+ i++)
+ num_dma_ranges++;
+ }
+
+ for (i = 0, num_memc = 0; i < BRCM_MAX_SCB; i++) {
+ u64 size = brcmstb_memory_memc_size(i);
+
+ if (size == (u64)-1) {
+ dev_err(pcie->dev, "cannot get memc%d size", i);
+ return -EINVAL;
+ } else if (size) {
+ scb_size[i] = roundup_pow_of_two_64(size);
+ num_memc++;
+ } else {
+ break;
+ }
+ }
+
+ return 0;
+}
+
static int brcm_pcie_add_controller(struct brcm_pcie *pcie)
{
int i, ret = 0;
+ struct device *dev = pcie->dev;
mutex_lock(&brcm_pcie_lock);
if (num_pcie > 0) {
@@ -607,12 +966,21 @@ static int brcm_pcie_add_controller(stru
goto done;
}
+ ret = brcm_register_notifier();
+ if (ret) {
+ dev_err(dev, "failed to register pci bus notifier\n");
+ goto done;
+ }
+ ret = brcm_pcie_parse_map_dma_ranges(pcie);
+ if (ret)
+ goto done;
+
/* Determine num_memc and their sizes */
for (i = 0, num_memc = 0; i < BRCM_MAX_SCB; i++) {
u64 size = brcmstb_memory_memc_size(i);
if (size == (u64)-1) {
- dev_err(pcie->dev, "cannot get memc%d size\n", i);
+ dev_err(dev, "cannot get memc%d size\n", i);
ret = -EINVAL;
goto done;
} else if (size) {
@@ -636,8 +1004,16 @@ done:
static void brcm_pcie_remove_controller(struct brcm_pcie *pcie)
{
mutex_lock(&brcm_pcie_lock);
- if (--num_pcie == 0)
- num_memc = 0;
+ if (--num_pcie > 0)
+ goto out;
+
+ if (brcm_unregister_notifier())
+ dev_err(pcie->dev, "failed to unregister pci bus notifier\n");
+ kfree(dma_ranges);
+ dma_ranges = NULL;
+ num_dma_ranges = 0;
+ num_memc = 0;
+out:
mutex_unlock(&brcm_pcie_lock);
}
@@ -757,6 +1133,38 @@ static int brcm_pcie_setup(struct brcm_p
*/
rc_bar2_offset = 0;
+ if (dma_ranges) {
+ /*
+ * The best-case scenario is to place the inbound
+ * region in the first 4GB of pci-space, as some
+ * legacy devices can only address 32bits.
+ * We would also like to put the MSI under 4GB
+ * as well, since some devices require a 32bit
+ * MSI target address.
+ */
+ if (total_mem_size <= 0xc0000000ULL &&
+ rc_bar2_size <= 0x100000000ULL) {
+ rc_bar2_offset = 0;
+ } else {
+ /*
+ * The system memory is 4GB or larger so we
+ * cannot start the inbound region at location
+ * 0 (since we have to allow some space for
+ * outbound memory @ 3GB). So instead we
+ * start it at the 1x multiple of its size
+ */
+ rc_bar2_offset = rc_bar2_size;
+ }
+
+ } else {
+ /*
+ * Set simple configuration based on memory sizes
+ * only. We always start the viewport at address 0,
+ * and set the MSI target address accordingly.
+ */
+ rc_bar2_offset = 0;
+ }
+
tmp = lower_32_bits(rc_bar2_offset);
tmp = INSERT_FIELD(tmp, PCIE_MISC_RC_BAR2_CONFIG_LO, SIZE,
encode_ibar_size(rc_bar2_size));
@@ -967,7 +1375,6 @@ static int brcm_pcie_probe(struct platfo
struct brcm_pcie *pcie;
struct resource *res;
void __iomem *base;
- u32 tmp;
struct pci_host_bridge *bridge;
struct pci_bus *child;
@@ -984,11 +1391,6 @@ static int brcm_pcie_probe(struct platfo
return -EINVAL;
}
- if (of_property_read_u32(dn, "dma-ranges", &tmp) == 0) {
- dev_err(&pdev->dev, "cannot yet handle dma-ranges\n");
- return -EINVAL;
- }
-
data = of_id->data;
pcie->reg_offsets = data->offsets;
pcie->reg_field_info = data->reg_field_info;

@ -1,543 +0,0 @@
From b1619c83208e7b804e2c3547dbf24bb02b3be239 Mon Sep 17 00:00:00 2001
From: Phil Elwell <phil@raspberrypi.org>
Date: Tue, 19 Feb 2019 22:06:59 +0000
Subject: [PATCH] PCI: brcmstb: Add MSI capability
This commit adds MSI to the Broadcom STB PCIe host controller. It does
not add MSIX since that functionality is not in the HW. The MSI
controller is physically located within the PCIe block, however, there
is no reason why the MSI controller could not be moved elsewhere in
the future.
Since the internal Brcmstb MSI controller is intertwined with the PCIe
controller, it is not its own platform device but rather part of the
PCIe platform device.
Signed-off-by: Jim Quinlan <jim2101024@gmail.com>
---
drivers/pci/controller/pcie-brcmstb.c | 374 ++++++++++++++++++++++++--
1 file changed, 353 insertions(+), 21 deletions(-)
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2009 - 2017 Broadcom */
+#include <linux/bitops.h>
#include <linux/clk.h>
#include <linux/compiler.h>
#include <linux/delay.h>
@@ -9,11 +10,13 @@
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/ioport.h>
+#include <linux/irqchip/chained_irq.h>
#include <linux/irqdomain.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/log2.h>
#include <linux/module.h>
+#include <linux/msi.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/of_pci.h>
@@ -47,6 +50,9 @@
#define PCIE_MISC_RC_BAR2_CONFIG_LO 0x4034
#define PCIE_MISC_RC_BAR2_CONFIG_HI 0x4038
#define PCIE_MISC_RC_BAR3_CONFIG_LO 0x403c
+#define PCIE_MISC_MSI_BAR_CONFIG_LO 0x4044
+#define PCIE_MISC_MSI_BAR_CONFIG_HI 0x4048
+#define PCIE_MISC_MSI_DATA_CONFIG 0x404c
#define PCIE_MISC_PCIE_CTRL 0x4064
#define PCIE_MISC_PCIE_STATUS 0x4068
#define PCIE_MISC_REVISION 0x406c
@@ -55,6 +61,7 @@
#define PCIE_MISC_CPU_2_PCIE_MEM_WIN0_LIMIT_HI 0x4084
#define PCIE_MISC_HARD_PCIE_HARD_DEBUG 0x4204
#define PCIE_INTR2_CPU_BASE 0x4300
+#define PCIE_MSI_INTR2_BASE 0x4500
/*
* Broadcom Settop Box PCIe Register Field shift and mask info. The
@@ -115,6 +122,8 @@
#define BRCM_NUM_PCIE_OUT_WINS 0x4
#define BRCM_MAX_SCB 0x4
+#define BRCM_INT_PCI_MSI_NR 32
+#define BRCM_PCIE_HW_REV_33 0x0303
#define BRCM_MSI_TARGET_ADDR_LT_4GB 0x0fffffffcULL
#define BRCM_MSI_TARGET_ADDR_GT_4GB 0xffffffffcULL
@@ -203,6 +212,33 @@ struct brcm_window {
dma_addr_t size;
};
+struct brcm_msi {
+ struct device *dev;
+ void __iomem *base;
+ struct device_node *dn;
+ struct irq_domain *msi_domain;
+ struct irq_domain *inner_domain;
+ struct mutex lock; /* guards the alloc/free operations */
+ u64 target_addr;
+ int irq;
+
+ /* intr_base is the base pointer for interrupt status/set/clr regs */
+ void __iomem *intr_base;
+
+ /* intr_legacy_mask indicates how many bits are MSI interrupts */
+ u32 intr_legacy_mask;
+
+ /*
+ * intr_legacy_offset indicates bit position of MSI_01. It is
+ * to map the register bit position to a hwirq that starts at 0.
+ */
+ u32 intr_legacy_offset;
+
+ /* used indicates which MSI interrupts have been alloc'd */
+ unsigned long used;
+ unsigned int rev;
+};
+
/* Internal PCIe Host Controller Information.*/
struct brcm_pcie {
struct device *dev;
@@ -217,7 +253,10 @@ struct brcm_pcie {
int num_out_wins;
bool ssc;
int gen;
+ u64 msi_target_addr;
struct brcm_window out_wins[BRCM_NUM_PCIE_OUT_WINS];
+ struct brcm_msi *msi;
+ bool msi_internal;
unsigned int rev;
const int *reg_offsets;
const int *reg_field_info;
@@ -225,9 +264,9 @@ struct brcm_pcie {
};
struct pcie_cfg_data {
- const int *reg_field_info;
- const int *offsets;
- const enum pcie_type type;
+ const int *reg_field_info;
+ const int *offsets;
+ const enum pcie_type type;
};
static const int pcie_reg_field_info[] = {
@@ -828,6 +867,267 @@ static void brcm_pcie_set_outbound_win(s
}
}
+static struct irq_chip brcm_msi_irq_chip = {
+ .name = "Brcm_MSI",
+ .irq_mask = pci_msi_mask_irq,
+ .irq_unmask = pci_msi_unmask_irq,
+};
+
+static struct msi_domain_info brcm_msi_domain_info = {
+ .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_PCI_MSIX),
+ .chip = &brcm_msi_irq_chip,
+};
+
+static void brcm_pcie_msi_isr(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct brcm_msi *msi;
+ unsigned long status, virq;
+ u32 mask, bit, hwirq;
+ struct device *dev;
+
+ chained_irq_enter(chip, desc);
+ msi = irq_desc_get_handler_data(desc);
+ mask = msi->intr_legacy_mask;
+ dev = msi->dev;
+
+ while ((status = bcm_readl(msi->intr_base + STATUS) & mask)) {
+ for_each_set_bit(bit, &status, BRCM_INT_PCI_MSI_NR) {
+ /* clear the interrupt */
+ bcm_writel(1 << bit, msi->intr_base + CLR);
+
+ /* Account for legacy interrupt offset */
+ hwirq = bit - msi->intr_legacy_offset;
+
+ virq = irq_find_mapping(msi->inner_domain, hwirq);
+ if (virq) {
+ if (msi->used & (1 << hwirq))
+ generic_handle_irq(virq);
+ else
+ dev_info(dev, "unhandled MSI %d\n",
+ hwirq);
+ } else {
+ /* Unknown MSI, just clear it */
+ dev_dbg(dev, "unexpected MSI\n");
+ }
+ }
+ }
+ chained_irq_exit(chip, desc);
+}
+
+static void brcm_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ struct brcm_msi *msi = irq_data_get_irq_chip_data(data);
+ u32 temp;
+
+ msg->address_lo = lower_32_bits(msi->target_addr);
+ msg->address_hi = upper_32_bits(msi->target_addr);
+ temp = bcm_readl(msi->base + PCIE_MISC_MSI_DATA_CONFIG);
+ msg->data = ((temp >> 16) & (temp & 0xffff)) | data->hwirq;
+}
+
+static int brcm_msi_set_affinity(struct irq_data *irq_data,
+ const struct cpumask *mask, bool force)
+{
+ return -EINVAL;
+}
+
+static struct irq_chip brcm_msi_bottom_irq_chip = {
+ .name = "Brcm_MSI",
+ .irq_compose_msi_msg = brcm_compose_msi_msg,
+ .irq_set_affinity = brcm_msi_set_affinity,
+};
+
+static int brcm_msi_alloc(struct brcm_msi *msi)
+{
+ int bit, hwirq;
+
+ mutex_lock(&msi->lock);
+ bit = ~msi->used ? ffz(msi->used) : -1;
+
+ if (bit >= 0 && bit < BRCM_INT_PCI_MSI_NR) {
+ msi->used |= (1 << bit);
+ hwirq = bit - msi->intr_legacy_offset;
+ } else {
+ hwirq = -ENOSPC;
+ }
+
+ mutex_unlock(&msi->lock);
+ return hwirq;
+}
+
+static void brcm_msi_free(struct brcm_msi *msi, unsigned long hwirq)
+{
+ mutex_lock(&msi->lock);
+ msi->used &= ~(1 << (hwirq + msi->intr_legacy_offset));
+ mutex_unlock(&msi->lock);
+}
+
+static int brcm_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *args)
+{
+ struct brcm_msi *msi = domain->host_data;
+ int hwirq;
+
+ hwirq = brcm_msi_alloc(msi);
+
+ if (hwirq < 0)
+ return hwirq;
+
+ irq_domain_set_info(domain, virq, (irq_hw_number_t)hwirq,
+ &brcm_msi_bottom_irq_chip, domain->host_data,
+ handle_simple_irq, NULL, NULL);
+ return 0;
+}
+
+static void brcm_irq_domain_free(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs)
+{
+ struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+ struct brcm_msi *msi = irq_data_get_irq_chip_data(d);
+
+ brcm_msi_free(msi, d->hwirq);
+}
+
+static void brcm_msi_set_regs(struct brcm_msi *msi)
+{
+ u32 data_val, msi_lo, msi_hi;
+
+ if (msi->rev >= BRCM_PCIE_HW_REV_33) {
+ /*
+ * ffe0 -- least sig 5 bits are 0 indicating 32 msgs
+ * 6540 -- this is our arbitrary unique data value
+ */
+ data_val = 0xffe06540;
+ } else {
+ /*
+ * fff8 -- least sig 3 bits are 0 indicating 8 msgs
+ * 6540 -- this is our arbitrary unique data value
+ */
+ data_val = 0xfff86540;
+ }
+
+ /*
+ * Make sure we are not masking MSIs. Note that MSIs can be masked,
+ * but that occurs on the PCIe EP device
+ */
+ bcm_writel(0xffffffff & msi->intr_legacy_mask,
+ msi->intr_base + MASK_CLR);
+
+ msi_lo = lower_32_bits(msi->target_addr);
+ msi_hi = upper_32_bits(msi->target_addr);
+ /*
+ * The 0 bit of PCIE_MISC_MSI_BAR_CONFIG_LO is repurposed to MSI
+ * enable, which we set to 1.
+ */
+ bcm_writel(msi_lo | 1, msi->base + PCIE_MISC_MSI_BAR_CONFIG_LO);
+ bcm_writel(msi_hi, msi->base + PCIE_MISC_MSI_BAR_CONFIG_HI);
+ bcm_writel(data_val, msi->base + PCIE_MISC_MSI_DATA_CONFIG);
+}
+
+static const struct irq_domain_ops msi_domain_ops = {
+ .alloc = brcm_irq_domain_alloc,
+ .free = brcm_irq_domain_free,
+};
+
+static int brcm_allocate_domains(struct brcm_msi *msi)
+{
+ struct fwnode_handle *fwnode = of_node_to_fwnode(msi->dn);
+ struct device *dev = msi->dev;
+
+ msi->inner_domain = irq_domain_add_linear(NULL, BRCM_INT_PCI_MSI_NR,
+ &msi_domain_ops, msi);
+ if (!msi->inner_domain) {
+ dev_err(dev, "failed to create IRQ domain\n");
+ return -ENOMEM;
+ }
+
+ msi->msi_domain = pci_msi_create_irq_domain(fwnode,
+ &brcm_msi_domain_info,
+ msi->inner_domain);
+ if (!msi->msi_domain) {
+ dev_err(dev, "failed to create MSI domain\n");
+ irq_domain_remove(msi->inner_domain);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void brcm_free_domains(struct brcm_msi *msi)
+{
+ irq_domain_remove(msi->msi_domain);
+ irq_domain_remove(msi->inner_domain);
+}
+
+static void brcm_msi_remove(struct brcm_pcie *pcie)
+{
+ struct brcm_msi *msi = pcie->msi;
+
+ if (!msi)
+ return;
+ irq_set_chained_handler(msi->irq, NULL);
+ irq_set_handler_data(msi->irq, NULL);
+ brcm_free_domains(msi);
+}
+
+static int brcm_pcie_enable_msi(struct brcm_pcie *pcie)
+{
+ struct brcm_msi *msi;
+ int irq, ret;
+ struct device *dev = pcie->dev;
+
+ irq = irq_of_parse_and_map(dev->of_node, 1);
+ if (irq <= 0) {
+ dev_err(dev, "cannot map msi intr\n");
+ return -ENODEV;
+ }
+
+ msi = devm_kzalloc(dev, sizeof(struct brcm_msi), GFP_KERNEL);
+ if (!msi)
+ return -ENOMEM;
+
+ msi->dev = dev;
+ msi->base = pcie->base;
+ msi->rev = pcie->rev;
+ msi->dn = pcie->dn;
+ msi->target_addr = pcie->msi_target_addr;
+ msi->irq = irq;
+
+ ret = brcm_allocate_domains(msi);
+ if (ret)
+ return ret;
+
+ irq_set_chained_handler_and_data(msi->irq, brcm_pcie_msi_isr, msi);
+
+ if (msi->rev >= BRCM_PCIE_HW_REV_33) {
+ msi->intr_base = msi->base + PCIE_MSI_INTR2_BASE;
+ /*
+ * This version of PCIe hw has only 32 intr bits
+ * starting at bit position 0.
+ */
+ msi->intr_legacy_mask = 0xffffffff;
+ msi->intr_legacy_offset = 0x0;
+ msi->used = 0x0;
+
+ } else {
+ msi->intr_base = msi->base + PCIE_INTR2_CPU_BASE;
+ /*
+ * This version of PCIe hw has only 8 intr bits starting
+ * at bit position 24.
+ */
+ msi->intr_legacy_mask = 0xff000000;
+ msi->intr_legacy_offset = 24;
+ msi->used = 0x00ffffff;
+ }
+
+ brcm_msi_set_regs(msi);
+ pcie->msi = msi;
+
+ return 0;
+}
+
/* Configuration space read/write support */
static int cfg_index(int busnr, int devfn, int reg)
{
@@ -1072,6 +1372,7 @@ static int brcm_pcie_setup(struct brcm_p
u16 nlw, cls, lnksta;
bool ssc_good = false;
struct device *dev = pcie->dev;
+ u64 msi_target_addr;
/* Reset the bridge */
brcm_pcie_bridge_sw_init_set(pcie, 1);
@@ -1116,27 +1417,24 @@ static int brcm_pcie_setup(struct brcm_p
* The PCIe host controller by design must set the inbound
* viewport to be a contiguous arrangement of all of the
* system's memory. In addition, its size mut be a power of
- * two. To further complicate matters, the viewport must
- * start on a pcie-address that is aligned on a multiple of its
- * size. If a portion of the viewport does not represent
- * system memory -- e.g. 3GB of memory requires a 4GB viewport
- * -- we can map the outbound memory in or after 3GB and even
- * though the viewport will overlap the outbound memory the
- * controller will know to send outbound memory downstream and
- * everything else upstream.
+ * two. Further, the MSI target address must NOT be placed
+ * inside this region, as the decoding logic will consider its
+ * address to be inbound memory traffic. To further
+ * complicate matters, the viewport must start on a
+ * pcie-address that is aligned on a multiple of its size.
+ * If a portion of the viewport does not represent system
+ * memory -- e.g. 3GB of memory requires a 4GB viewport --
+ * we can map the outbound memory in or after 3GB and even
+ * though the viewport will overlap the outbound memory
+ * the controller will know to send outbound memory downstream
+ * and everything else upstream.
*/
rc_bar2_size = roundup_pow_of_two_64(total_mem_size);
- /*
- * Set simple configuration based on memory sizes
- * only. We always start the viewport at address 0.
- */
- rc_bar2_offset = 0;
-
if (dma_ranges) {
/*
* The best-case scenario is to place the inbound
- * region in the first 4GB of pci-space, as some
+ * region in the first 4GB of pcie-space, as some
* legacy devices can only address 32bits.
* We would also like to put the MSI under 4GB
* as well, since some devices require a 32bit
@@ -1145,6 +1443,14 @@ static int brcm_pcie_setup(struct brcm_p
if (total_mem_size <= 0xc0000000ULL &&
rc_bar2_size <= 0x100000000ULL) {
rc_bar2_offset = 0;
+ /* If the viewport is less then 4GB we can fit
+ * the MSI target address under 4GB. Otherwise
+ * put it right below 64GB.
+ */
+ msi_target_addr =
+ (rc_bar2_size == 0x100000000ULL)
+ ? BRCM_MSI_TARGET_ADDR_GT_4GB
+ : BRCM_MSI_TARGET_ADDR_LT_4GB;
} else {
/*
* The system memory is 4GB or larger so we
@@ -1154,8 +1460,12 @@ static int brcm_pcie_setup(struct brcm_p
* start it at the 1x multiple of its size
*/
rc_bar2_offset = rc_bar2_size;
- }
+ /* Since we are starting the viewport at 4GB or
+ * higher, put the MSI target address below 4GB
+ */
+ msi_target_addr = BRCM_MSI_TARGET_ADDR_LT_4GB;
+ }
} else {
/*
* Set simple configuration based on memory sizes
@@ -1163,7 +1473,12 @@ static int brcm_pcie_setup(struct brcm_p
* and set the MSI target address accordingly.
*/
rc_bar2_offset = 0;
+
+ msi_target_addr = (rc_bar2_size >= 0x100000000ULL)
+ ? BRCM_MSI_TARGET_ADDR_GT_4GB
+ : BRCM_MSI_TARGET_ADDR_LT_4GB;
}
+ pcie->msi_target_addr = msi_target_addr;
tmp = lower_32_bits(rc_bar2_offset);
tmp = INSERT_FIELD(tmp, PCIE_MISC_RC_BAR2_CONFIG_LO, SIZE,
@@ -1333,6 +1648,9 @@ static int brcm_pcie_resume(struct devic
if (ret)
return ret;
+ if (pcie->msi && pcie->msi_internal)
+ brcm_msi_set_regs(pcie->msi);
+
pcie->suspended = false;
return 0;
@@ -1340,6 +1658,7 @@ static int brcm_pcie_resume(struct devic
static void _brcm_pcie_remove(struct brcm_pcie *pcie)
{
+ brcm_msi_remove(pcie);
turn_off(pcie);
clk_disable_unprepare(pcie->clk);
clk_put(pcie->clk);
@@ -1368,7 +1687,7 @@ MODULE_DEVICE_TABLE(of, brcm_pcie_match)
static int brcm_pcie_probe(struct platform_device *pdev)
{
- struct device_node *dn = pdev->dev.of_node;
+ struct device_node *dn = pdev->dev.of_node, *msi_dn;
const struct of_device_id *of_id;
const struct pcie_cfg_data *data;
int ret;
@@ -1448,6 +1767,20 @@ static int brcm_pcie_probe(struct platfo
if (ret)
goto fail;
+ msi_dn = of_parse_phandle(pcie->dn, "msi-parent", 0);
+ /* Use the internal MSI if no msi-parent property */
+ if (!msi_dn)
+ msi_dn = pcie->dn;
+
+ if (pci_msi_enabled() && msi_dn == pcie->dn) {
+ ret = brcm_pcie_enable_msi(pcie);
+ if (ret)
+ dev_err(pcie->dev,
+ "probe of internal MSI failed: %d)", ret);
+ else
+ pcie->msi_internal = true;
+ }
+
list_splice_init(&pcie->resources, &bridge->windows);
bridge->dev.parent = &pdev->dev;
bridge->busnr = 0;
@@ -1470,7 +1803,6 @@ static int brcm_pcie_probe(struct platfo
pcie->root_bus = bridge->bus;
return 0;
-
fail:
_brcm_pcie_remove(pcie);
return ret;

@ -28,17 +28,3 @@ Subject: [PATCH] config: Permit LPAE and PCIE_BRCMSTB on BCM2835
help
This enables support for the Broadcom BCM2835 and BCM2836 SoCs.
This SoC is used in the Raspberry Pi and Roku 2 devices.
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -290,9 +290,9 @@ config PCI_HYPERV_INTERFACE
config PCIE_BRCMSTB
tristate "Broadcom Brcmstb PCIe platform host driver"
- depends on ARCH_BRCMSTB || BMIPS_GENERIC
+ depends on ARCH_BRCMSTB || BMIPS_GENERIC || ARCH_BCM2835
depends on OF
- depends on SOC_BRCMSTB
+ depends on SOC_BRCMSTB || ARCH_BCM2835
default ARCH_BRCMSTB || BMIPS_GENERIC
help
Adds support for Broadcom Settop Box PCIe host controller.

@ -1,25 +0,0 @@
From 235f775351e8f7e47cff1baa1284e0df95e3234e Mon Sep 17 00:00:00 2001
From: Andrei Gherzan <andrei@balena.io>
Date: Tue, 16 Jul 2019 13:28:22 +0100
Subject: [PATCH] arm64/mm: Limit the DMA zone for arm64
On RaspberryPi, only the first 1Gb can be used for DMA[1].
[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2019-July/665986.html
Signed-off-by: Andrei Gherzan <andrei@balena.io>
---
arch/arm64/mm/init.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -177,7 +177,7 @@ static void __init reserve_elfcorehdr(vo
static phys_addr_t __init max_zone_dma_phys(void)
{
phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32);
- return min(offset + (1ULL << 32), memblock_end_of_DRAM());
+ return min(offset + (1ULL << 30), memblock_end_of_DRAM());
}
#ifdef CONFIG_NUMA

@ -1,945 +0,0 @@
From 773a2db89ad2785d72b215673d87c0a51d769f61 Mon Sep 17 00:00:00 2001
From: Phil Elwell <phil@raspberrypi.org>
Date: Thu, 4 Apr 2019 13:33:47 +0100
Subject: [PATCH] bcm2835-dma: Add proper 40-bit DMA support
The 40-bit additions are not fully tested, but it should be
capable of supporting both 40-bit memcpy on BCM2711 and regular
Lite channels on BCM2835.
Signed-off-by: Phil Elwell <phil@raspberrypi.org>
---
drivers/dma/bcm2835-dma.c | 421 ++++++++++++++-----
drivers/pci/controller/pcie-brcmstb-bounce.c | 30 +-
drivers/pci/controller/pcie-brcmstb-bounce.h | 21 +-
drivers/pci/controller/pcie-brcmstb.c | 23 +-
4 files changed, 369 insertions(+), 126 deletions(-)
--- a/drivers/dma/bcm2835-dma.c
+++ b/drivers/dma/bcm2835-dma.c
@@ -38,6 +38,11 @@
#define BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED 14
#define BCM2835_DMA_CHAN_NAME_SIZE 8
#define BCM2835_DMA_BULK_MASK BIT(0)
+#define BCM2838_DMA_MEMCPY_CHAN 14
+
+struct bcm2835_dma_cfg_data {
+ u32 chan_40bit_mask;
+};
/**
* struct bcm2835_dmadev - BCM2835 DMA controller
@@ -52,6 +57,7 @@ struct bcm2835_dmadev {
void __iomem *base;
struct device_dma_parameters dma_parms;
dma_addr_t zero_page;
+ const struct bcm2835_dma_cfg_data *cfg_data;
};
struct bcm2835_dma_cb {
@@ -95,6 +101,7 @@ struct bcm2835_chan {
unsigned int irq_flags;
bool is_lite_channel;
+ bool is_40bit_channel;
};
struct bcm2835_desc {
@@ -184,7 +191,8 @@ struct bcm2835_desc {
#define BCM2835_DMA_DATA_TYPE_S128 16
/* Valid only for channels 0 - 14, 15 has its own base address */
-#define BCM2835_DMA_CHAN(n) ((n) << 8) /* Base address */
+#define BCM2835_DMA_CHAN_SIZE 0x100
+#define BCM2835_DMA_CHAN(n) ((n) * BCM2835_DMA_CHAN_SIZE) /* Base address */
#define BCM2835_DMA_CHANIO(base, n) ((base) + BCM2835_DMA_CHAN(n))
/* the max dma length for different channels */
@@ -195,7 +203,7 @@ struct bcm2835_desc {
#define BCM2838_DMA40_CS 0x00
#define BCM2838_DMA40_CB 0x04
#define BCM2838_DMA40_DEBUG 0x0c
-#define BCM2858_DMA40_TI 0x10
+#define BCM2838_DMA40_TI 0x10
#define BCM2838_DMA40_SRC 0x14
#define BCM2838_DMA40_SRCI 0x18
#define BCM2838_DMA40_DEST 0x1c
@@ -204,32 +212,97 @@ struct bcm2835_desc {
#define BCM2838_DMA40_NEXT_CB 0x28
#define BCM2838_DMA40_DEBUG2 0x2c
-#define BCM2838_DMA40_CS_ACTIVE BIT(0)
-#define BCM2838_DMA40_CS_END BIT(1)
+#define BCM2838_DMA40_ACTIVE BIT(0)
+#define BCM2838_DMA40_END BIT(1)
+#define BCM2838_DMA40_INT BIT(2)
+#define BCM2838_DMA40_DREQ BIT(3) /* DREQ state */
+#define BCM2838_DMA40_RD_PAUSED BIT(4) /* Reading is paused */
+#define BCM2838_DMA40_WR_PAUSED BIT(5) /* Writing is paused */
+#define BCM2838_DMA40_DREQ_PAUSED BIT(6) /* Is paused by DREQ flow control */
+#define BCM2838_DMA40_WAITING_FOR_WRITES BIT(7) /* Waiting for last write */
+#define BCM2838_DMA40_ERR BIT(10)
+#define BCM2838_DMA40_QOS(x) (((x) & 0x1f) << 16)
+#define BCM2838_DMA40_PANIC_QOS(x) (((x) & 0x1f) << 20)
+#define BCM2838_DMA40_WAIT_FOR_WRITES BIT(28)
+#define BCM2838_DMA40_DISDEBUG BIT(29)
+#define BCM2838_DMA40_ABORT BIT(30)
+#define BCM2838_DMA40_HALT BIT(31)
+#define BCM2838_DMA40_CS_FLAGS(x) (x & (BCM2838_DMA40_QOS(15) | \
+ BCM2838_DMA40_PANIC_QOS(15) | \
+ BCM2838_DMA40_WAIT_FOR_WRITES | \
+ BCM2838_DMA40_DISDEBUG))
+
+/* Transfer information bits */
+#define BCM2838_DMA40_INTEN BIT(0)
+#define BCM2838_DMA40_TDMODE BIT(1) /* 2D-Mode */
+#define BCM2838_DMA40_WAIT_RESP BIT(2) /* wait for AXI write to be acked */
+#define BCM2838_DMA40_WAIT_RD_RESP BIT(3) /* wait for AXI read to complete */
+#define BCM2838_DMA40_PER_MAP(x) ((x & 31) << 9) /* REQ source */
+#define BCM2838_DMA40_S_DREQ BIT(14) /* enable SREQ for source */
+#define BCM2838_DMA40_D_DREQ BIT(15) /* enable DREQ for destination */
+#define BCM2838_DMA40_S_WAIT(x) ((x & 0xff) << 16) /* add DMA read-wait cycles */
+#define BCM2838_DMA40_D_WAIT(x) ((x & 0xff) << 24) /* add DMA write-wait cycles */
-#define BCM2838_DMA40_CS_QOS(x) (((x) & 0x1f) << 16)
-#define BCM2838_DMA40_CS_PANIC_QOS(x) (((x) & 0x1f) << 20)
-#define BCM2838_DMA40_CS_WRITE_WAIT BIT(28)
+/* debug register bits */
+#define BCM2838_DMA40_DEBUG_WRITE_ERR BIT(0)
+#define BCM2838_DMA40_DEBUG_FIFO_ERR BIT(1)
+#define BCM2838_DMA40_DEBUG_READ_ERR BIT(2)
+#define BCM2838_DMA40_DEBUG_READ_CB_ERR BIT(3)
+#define BCM2838_DMA40_DEBUG_IN_ON_ERR BIT(8)
+#define BCM2838_DMA40_DEBUG_ABORT_ON_ERR BIT(9)
+#define BCM2838_DMA40_DEBUG_HALT_ON_ERR BIT(10)
+#define BCM2838_DMA40_DEBUG_DISABLE_CLK_GATE BIT(11)
+#define BCM2838_DMA40_DEBUG_RSTATE_SHIFT 14
+#define BCM2838_DMA40_DEBUG_RSTATE_BITS 4
+#define BCM2838_DMA40_DEBUG_WSTATE_SHIFT 18
+#define BCM2838_DMA40_DEBUG_WSTATE_BITS 4
+#define BCM2838_DMA40_DEBUG_RESET BIT(23)
+#define BCM2838_DMA40_DEBUG_ID_SHIFT 24
+#define BCM2838_DMA40_DEBUG_ID_BITS 4
+#define BCM2838_DMA40_DEBUG_VERSION_SHIFT 28
+#define BCM2838_DMA40_DEBUG_VERSION_BITS 4
+
+/* Valid only for channels 0 - 3 (11 - 14) */
+#define BCM2838_DMA40_CHAN(n) (((n) + 11) << 8) /* Base address */
+#define BCM2838_DMA40_CHANIO(base, n) ((base) + BCM2838_DMA_CHAN(n))
-#define BCM2838_DMA40_BURST_LEN(x) ((((x) - 1) & 0xf) << 8)
-#define BCM2838_DMA40_INC BIT(12)
-#define BCM2838_DMA40_SIZE_128 (2 << 13)
+/* the max dma length for different channels */
+#define MAX_DMA40_LEN SZ_1G
-#define BCM2838_DMA40_MEMCPY_QOS \
- (BCM2838_DMA40_CS_QOS(0x0) | \
- BCM2838_DMA40_CS_PANIC_QOS(0x0) | \
- BCM2838_DMA40_CS_WRITE_WAIT)
+#define BCM2838_DMA40_BURST_LEN(x) ((min(x,16) - 1) << 8)
+#define BCM2838_DMA40_INC BIT(12)
+#define BCM2838_DMA40_SIZE_32 (0 << 13)
+#define BCM2838_DMA40_SIZE_64 (1 << 13)
+#define BCM2838_DMA40_SIZE_128 (2 << 13)
+#define BCM2838_DMA40_SIZE_256 (3 << 13)
+#define BCM2838_DMA40_IGNORE BIT(15)
+#define BCM2838_DMA40_STRIDE(x) ((x) << 16) /* For 2D mode */
+
+#define BCM2838_DMA40_MEMCPY_FLAGS \
+ (BCM2838_DMA40_QOS(0) | \
+ BCM2838_DMA40_PANIC_QOS(0) | \
+ BCM2838_DMA40_WAIT_FOR_WRITES | \
+ BCM2838_DMA40_DISDEBUG)
#define BCM2838_DMA40_MEMCPY_XFER_INFO \
(BCM2838_DMA40_SIZE_128 | \
BCM2838_DMA40_INC | \
BCM2838_DMA40_BURST_LEN(16))
+struct bcm2835_dmadev *memcpy_parent;
static void __iomem *memcpy_chan;
static struct bcm2838_dma40_scb *memcpy_scb;
static dma_addr_t memcpy_scb_dma;
DEFINE_SPINLOCK(memcpy_lock);
+static const struct bcm2835_dma_cfg_data bcm2835_dma_cfg = {
+ .chan_40bit_mask = 0,
+};
+
+static const struct bcm2835_dma_cfg_data bcm2838_dma_cfg = {
+ .chan_40bit_mask = BIT(11) | BIT(12) | BIT(13) | BIT(14),
+};
+
static inline size_t bcm2835_dma_max_frame_length(struct bcm2835_chan *c)
{
/* lite and normal channels have different max frame length */
@@ -259,6 +332,32 @@ static inline struct bcm2835_desc *to_bc
return container_of(t, struct bcm2835_desc, vd.tx);
}
+static inline uint32_t to_bcm2838_ti(uint32_t info)
+{
+ return ((info & BCM2835_DMA_INT_EN) ? BCM2838_DMA40_INTEN : 0) |
+ ((info & BCM2835_DMA_WAIT_RESP) ? BCM2838_DMA40_WAIT_RESP : 0) |
+ ((info & BCM2835_DMA_S_DREQ) ?
+ (BCM2838_DMA40_S_DREQ | BCM2838_DMA40_WAIT_RD_RESP) : 0) |
+ ((info & BCM2835_DMA_D_DREQ) ? BCM2838_DMA40_D_DREQ : 0) |
+ BCM2838_DMA40_PER_MAP((info >> 16) & 0x1f);
+}
+
+static inline uint32_t to_bcm2838_srci(uint32_t info)
+{
+ return ((info & BCM2835_DMA_S_INC) ? BCM2838_DMA40_INC : 0);
+}
+
+static inline uint32_t to_bcm2838_dsti(uint32_t info)
+{
+ return ((info & BCM2835_DMA_D_INC) ? BCM2838_DMA40_INC : 0);
+}
+
+static inline uint32_t to_bcm2838_cbaddr(dma_addr_t addr)
+{
+ BUG_ON(addr & 0x1f);
+ return (addr >> 5);
+}
+
static void bcm2835_dma_free_cb_chain(struct bcm2835_desc *desc)
{
size_t i;
@@ -277,45 +376,53 @@ static void bcm2835_dma_desc_free(struct
}
static void bcm2835_dma_create_cb_set_length(
- struct bcm2835_chan *chan,
+ struct bcm2835_chan *c,
struct bcm2835_dma_cb *control_block,
size_t len,
size_t period_len,
size_t *total_len,
u32 finalextrainfo)
{
- size_t max_len = bcm2835_dma_max_frame_length(chan);
+ size_t max_len = bcm2835_dma_max_frame_length(c);
+ uint32_t cb_len;
/* set the length taking lite-channel limitations into account */
- control_block->length = min_t(u32, len, max_len);
+ cb_len = min_t(u32, len, max_len);
- /* finished if we have no period_length */
- if (!period_len)
- return;
+ if (period_len) {
+ /*
+ * period_len means: that we need to generate
+ * transfers that are terminating at every
+ * multiple of period_len - this is typically
+ * used to set the interrupt flag in info
+ * which is required during cyclic transfers
+ */
- /*
- * period_len means: that we need to generate
- * transfers that are terminating at every
- * multiple of period_len - this is typically
- * used to set the interrupt flag in info
- * which is required during cyclic transfers
- */
+ /* have we filled in period_length yet? */
+ if (*total_len + cb_len < period_len) {
+ /* update number of bytes in this period so far */
+ *total_len += cb_len;
+ } else {
+ /* calculate the length that remains to reach period_len */
+ cb_len = period_len - *total_len;
- /* have we filled in period_length yet? */
- if (*total_len + control_block->length < period_len) {
- /* update number of bytes in this period so far */
- *total_len += control_block->length;
- return;
+ /* reset total_length for next period */
+ *total_len = 0;
+ }
}
- /* calculate the length that remains to reach period_length */
- control_block->length = period_len - *total_len;
-
- /* reset total_length for next period */
- *total_len = 0;
-
- /* add extrainfo bits in info */
- control_block->info |= finalextrainfo;
+ if (c->is_40bit_channel) {
+ struct bcm2838_dma40_scb *scb =
+ (struct bcm2838_dma40_scb *)control_block;
+
+ scb->len = cb_len;
+ /* add extrainfo bits to ti */
+ scb->ti |= to_bcm2838_ti(finalextrainfo);
+ } else {
+ control_block->length = cb_len;
+ /* add extrainfo bits to info */
+ control_block->info |= finalextrainfo;
+ }
}
static inline size_t bcm2835_dma_count_frames_for_sg(
@@ -338,7 +445,7 @@ static inline size_t bcm2835_dma_count_f
/**
* bcm2835_dma_create_cb_chain - create a control block and fills data in
*
- * @chan: the @dma_chan for which we run this
+ * @c: the @bcm2835_chan for which we run this
* @direction: the direction in which we transfer
* @cyclic: it is a cyclic transfer
* @info: the default info bits to apply per controlblock
@@ -356,12 +463,11 @@ static inline size_t bcm2835_dma_count_f
* @gfp: the GFP flag to use for allocation
*/
static struct bcm2835_desc *bcm2835_dma_create_cb_chain(
- struct dma_chan *chan, enum dma_transfer_direction direction,
+ struct bcm2835_chan *c, enum dma_transfer_direction direction,
bool cyclic, u32 info, u32 finalextrainfo, size_t frames,
dma_addr_t src, dma_addr_t dst, size_t buf_len,
size_t period_len, gfp_t gfp)
{
- struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
size_t len = buf_len, total_len;
size_t frame;
struct bcm2835_desc *d;
@@ -393,11 +499,23 @@ static struct bcm2835_desc *bcm2835_dma_
/* fill in the control block */
control_block = cb_entry->cb;
- control_block->info = info;
- control_block->src = src;
- control_block->dst = dst;
- control_block->stride = 0;
- control_block->next = 0;
+ if (c->is_40bit_channel) {
+ struct bcm2838_dma40_scb *scb =
+ (struct bcm2838_dma40_scb *)control_block;
+ scb->ti = to_bcm2838_ti(info);
+ scb->src = lower_32_bits(src);
+ scb->srci= upper_32_bits(src) | to_bcm2838_srci(info);
+ scb->dst = lower_32_bits(dst);
+ scb->dsti = upper_32_bits(dst) | to_bcm2838_dsti(info);
+ scb->next_cb = 0;
+ } else {
+ control_block->info = info;
+ control_block->src = src;
+ control_block->dst = dst;
+ control_block->stride = 0;
+ control_block->next = 0;
+ }
+
/* set up length in control_block if requested */
if (buf_len) {
/* calculate length honoring period_length */
@@ -411,7 +529,10 @@ static struct bcm2835_desc *bcm2835_dma_
}
/* link this the last controlblock */
- if (frame)
+ if (frame && c->is_40bit_channel)
+ d->cb_list[frame - 1].cb->next =
+ to_bcm2838_cbaddr(cb_entry->paddr);
+ if (frame && !c->is_40bit_channel)
d->cb_list[frame - 1].cb->next = cb_entry->paddr;
/* update src and dst and length */
@@ -425,7 +546,14 @@ static struct bcm2835_desc *bcm2835_dma_
}
/* the last frame requires extra flags */
- d->cb_list[d->frames - 1].cb->info |= finalextrainfo;
+ if (c->is_40bit_channel) {
+ struct bcm2838_dma40_scb *scb =
+ (struct bcm2838_dma40_scb *)d->cb_list[d->frames-1].cb;
+
+ scb->ti |= to_bcm2838_ti(finalextrainfo);
+ } else {
+ d->cb_list[d->frames - 1].cb->info |= finalextrainfo;
+ }
/* detect a size missmatch */
if (buf_len && (d->size != buf_len))
@@ -439,13 +567,12 @@ error_cb:
}
static void bcm2835_dma_fill_cb_chain_with_sg(
- struct dma_chan *chan,
+ struct bcm2835_chan *c,
enum dma_transfer_direction direction,
struct bcm2835_cb_entry *cb,
struct scatterlist *sgl,
unsigned int sg_len)
{
- struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
size_t len, max_len;
unsigned int i;
dma_addr_t addr;
@@ -453,14 +580,34 @@ static void bcm2835_dma_fill_cb_chain_wi
max_len = bcm2835_dma_max_frame_length(c);
for_each_sg(sgl, sgent, sg_len, i) {
- for (addr = sg_dma_address(sgent), len = sg_dma_len(sgent);
- len > 0;
- addr += cb->cb->length, len -= cb->cb->length, cb++) {
- if (direction == DMA_DEV_TO_MEM)
- cb->cb->dst = addr;
- else
- cb->cb->src = addr;
- cb->cb->length = min(len, max_len);
+ if (c->is_40bit_channel) {
+ struct bcm2838_dma40_scb *scb =
+ (struct bcm2838_dma40_scb *)cb->cb;
+ for (addr = sg_dma_address(sgent),
+ len = sg_dma_len(sgent);
+ len > 0;
+ addr += scb->len, len -= scb->len, scb++) {
+ if (direction == DMA_DEV_TO_MEM) {
+ scb->dst = lower_32_bits(addr);
+ scb->dsti = upper_32_bits(addr) | BCM2838_DMA40_INC;
+ } else {
+ scb->src = lower_32_bits(addr);
+ scb->srci = upper_32_bits(addr) | BCM2838_DMA40_INC;
+ }
+ scb->len = min(len, max_len);
+ }
+ } else {
+ for (addr = sg_dma_address(sgent),
+ len = sg_dma_len(sgent);
+ len > 0;
+ addr += cb->cb->length, len -= cb->cb->length,
+ cb++) {
+ if (direction == DMA_DEV_TO_MEM)
+ cb->cb->dst = addr;
+ else
+ cb->cb->src = addr;
+ cb->cb->length = min(len, max_len);
+ }
}
}
}
@@ -469,6 +616,10 @@ static void bcm2835_dma_abort(struct bcm
{
void __iomem *chan_base = c->chan_base;
long int timeout = 10000;
+ u32 wait_mask = BCM2835_DMA_WAITING_FOR_WRITES;
+
+ if (c->is_40bit_channel)
+ wait_mask = BCM2838_DMA40_WAITING_FOR_WRITES;
/*
* A zero control block address means the channel is idle.
@@ -481,8 +632,7 @@ static void bcm2835_dma_abort(struct bcm
writel(0, chan_base + BCM2835_DMA_CS);
/* Wait for any current AXI transfer to complete */
- while ((readl(chan_base + BCM2835_DMA_CS) &
- BCM2835_DMA_WAITING_FOR_WRITES) && --timeout)
+ while ((readl(chan_base + BCM2835_DMA_CS) & wait_mask) && --timeout)
cpu_relax();
/* Peripheral might be stuck and fail to signal AXI write responses */
@@ -507,9 +657,16 @@ static void bcm2835_dma_start_desc(struc
c->desc = d = to_bcm2835_dma_desc(&vd->tx);
- writel(d->cb_list[0].paddr, c->chan_base + BCM2835_DMA_ADDR);
- writel(BCM2835_DMA_ACTIVE | BCM2835_DMA_CS_FLAGS(c->dreq),
- c->chan_base + BCM2835_DMA_CS);
+ if (c->is_40bit_channel) {
+ writel(to_bcm2838_cbaddr(d->cb_list[0].paddr),
+ c->chan_base + BCM2838_DMA40_CB);
+ writel(BCM2838_DMA40_ACTIVE | BCM2838_DMA40_CS_FLAGS(c->dreq),
+ c->chan_base + BCM2838_DMA40_CS);
+ } else {
+ writel(d->cb_list[0].paddr, c->chan_base + BCM2835_DMA_ADDR);
+ writel(BCM2835_DMA_ACTIVE | BCM2835_DMA_CS_FLAGS(c->dreq),
+ c->chan_base + BCM2835_DMA_CS);
+ }
}
static irqreturn_t bcm2835_dma_callback(int irq, void *data)
@@ -537,7 +694,8 @@ static irqreturn_t bcm2835_dma_callback(
* will remain idle despite the ACTIVE flag being set.
*/
writel(BCM2835_DMA_INT | BCM2835_DMA_ACTIVE |
- BCM2835_DMA_CS_FLAGS(c->dreq),
+ (c->is_40bit_channel ? BCM2838_DMA40_CS_FLAGS(c->dreq) :
+ BCM2835_DMA_CS_FLAGS(c->dreq)),
c->chan_base + BCM2835_DMA_CS);
d = c->desc;
@@ -640,9 +798,17 @@ static enum dma_status bcm2835_dma_tx_st
struct bcm2835_desc *d = c->desc;
dma_addr_t pos;
- if (d->dir == DMA_MEM_TO_DEV)
+ if (d->dir == DMA_MEM_TO_DEV && c->is_40bit_channel)
+ pos = readl(c->chan_base + BCM2838_DMA40_SRC) +
+ ((readl(c->chan_base + BCM2838_DMA40_SRCI) &
+ 0xff) << 8);
+ else if (d->dir == DMA_MEM_TO_DEV && !c->is_40bit_channel)
pos = readl(c->chan_base + BCM2835_DMA_SOURCE_AD);
- else if (d->dir == DMA_DEV_TO_MEM)
+ else if (d->dir == DMA_DEV_TO_MEM && c->is_40bit_channel)
+ pos = readl(c->chan_base + BCM2838_DMA40_DEST) +
+ ((readl(c->chan_base + BCM2838_DMA40_DESTI) &
+ 0xff) << 8);
+ else if (d->dir == DMA_DEV_TO_MEM && !c->is_40bit_channel)
pos = readl(c->chan_base + BCM2835_DMA_DEST_AD);
else
pos = 0;
@@ -688,7 +854,7 @@ static struct dma_async_tx_descriptor *b
frames = bcm2835_dma_frames_for_length(len, max_len);
/* allocate the CB chain - this also fills in the pointers */
- d = bcm2835_dma_create_cb_chain(chan, DMA_MEM_TO_MEM, false,
+ d = bcm2835_dma_create_cb_chain(c, DMA_MEM_TO_MEM, false,
info, extra, frames,
src, dst, len, 0, GFP_KERNEL);
if (!d)
@@ -723,11 +889,21 @@ static struct dma_async_tx_descriptor *b
if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
return NULL;
src = c->cfg.src_addr;
+ /*
+ * One would think it ought to be possible to get the physical
+ * to dma address mapping information from the dma-ranges DT
+ * property, but I've not found a way yet that doesn't involve
+ * open-coding the whole thing.
+ */
+ if (c->is_40bit_channel)
+ src |= 0x400000000ull;
info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC;
} else {
if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
return NULL;
dst = c->cfg.dst_addr;
+ if (c->is_40bit_channel)
+ dst |= 0x400000000ull;
info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC;
}
@@ -735,7 +911,7 @@ static struct dma_async_tx_descriptor *b
frames = bcm2835_dma_count_frames_for_sg(c, sgl, sg_len);
/* allocate the CB chain */
- d = bcm2835_dma_create_cb_chain(chan, direction, false,
+ d = bcm2835_dma_create_cb_chain(c, direction, false,
info, extra,
frames, src, dst, 0, 0,
GFP_NOWAIT);
@@ -743,7 +919,7 @@ static struct dma_async_tx_descriptor *b
return NULL;
/* fill in frames with scatterlist pointers */
- bcm2835_dma_fill_cb_chain_with_sg(chan, direction, d->cb_list,
+ bcm2835_dma_fill_cb_chain_with_sg(c, direction, d->cb_list,
sgl, sg_len);
return vchan_tx_prep(&c->vc, &d->vd, flags);
@@ -822,7 +998,7 @@ static struct dma_async_tx_descriptor *b
* note that we need to use GFP_NOWAIT, as the ALSA i2s dmaengine
* implementation calls prep_dma_cyclic with interrupts disabled.
*/
- d = bcm2835_dma_create_cb_chain(chan, direction, true,
+ d = bcm2835_dma_create_cb_chain(c, direction, true,
info, extra,
frames, src, dst, buf_len,
period_len, GFP_NOWAIT);
@@ -830,7 +1006,8 @@ static struct dma_async_tx_descriptor *b
return NULL;
/* wrap around into a loop */
- d->cb_list[d->frames - 1].cb->next = d->cb_list[0].paddr;
+ d->cb_list[d->frames - 1].cb->next = c->is_40bit_channel ?
+ to_bcm2838_cbaddr(d->cb_list[0].paddr) : d->cb_list[0].paddr;
return vchan_tx_prep(&c->vc, &d->vd, flags);
}
@@ -894,9 +1071,11 @@ static int bcm2835_dma_chan_init(struct
c->irq_number = irq;
c->irq_flags = irq_flags;
- /* check in DEBUG register if this is a LITE channel */
- if (readl(c->chan_base + BCM2835_DMA_DEBUG) &
- BCM2835_DMA_DEBUG_LITE)
+ /* check for 40bit and lite channels */
+ if (d->cfg_data->chan_40bit_mask & BIT(chan_id))
+ c->is_40bit_channel = true;
+ else if (readl(c->chan_base + BCM2835_DMA_DEBUG) &
+ BCM2835_DMA_DEBUG_LITE)
c->is_lite_channel = true;
return 0;
@@ -916,18 +1095,16 @@ static void bcm2835_dma_free(struct bcm2
DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
}
-int bcm2838_dma40_memcpy_init(struct device *dev)
+int bcm2838_dma40_memcpy_init(void)
{
- if (memcpy_scb)
- return 0;
+ if (!memcpy_parent)
+ return -EPROBE_DEFER;
- memcpy_scb = dma_alloc_coherent(dev, sizeof(*memcpy_scb),
- &memcpy_scb_dma, GFP_KERNEL);
+ if (!memcpy_chan)
+ return -EINVAL;
- if (!memcpy_scb) {
- pr_err("bcm2838_dma40_memcpy_init failed!\n");
+ if (!memcpy_scb)
return -ENOMEM;
- }
return 0;
}
@@ -954,20 +1131,22 @@ void bcm2838_dma40_memcpy(dma_addr_t dst
scb->next_cb = 0;
writel((u32)(memcpy_scb_dma >> 5), memcpy_chan + BCM2838_DMA40_CB);
- writel(BCM2838_DMA40_MEMCPY_QOS + BCM2838_DMA40_CS_ACTIVE,
+ writel(BCM2838_DMA40_MEMCPY_FLAGS + BCM2838_DMA40_ACTIVE,
memcpy_chan + BCM2838_DMA40_CS);
+
/* Poll for completion */
- while (!(readl(memcpy_chan + BCM2838_DMA40_CS) & BCM2838_DMA40_CS_END))
+ while (!(readl(memcpy_chan + BCM2838_DMA40_CS) & BCM2838_DMA40_END))
cpu_relax();
- writel(BCM2838_DMA40_CS_END, memcpy_chan + BCM2838_DMA40_CS);
+ writel(BCM2838_DMA40_END, memcpy_chan + BCM2838_DMA40_CS);
spin_unlock_irqrestore(&memcpy_lock, flags);
}
EXPORT_SYMBOL(bcm2838_dma40_memcpy);
static const struct of_device_id bcm2835_dma_of_match[] = {
- { .compatible = "brcm,bcm2835-dma", },
+ { .compatible = "brcm,bcm2835-dma", .data = &bcm2835_dma_cfg },
+ { .compatible = "brcm,bcm2838-dma", .data = &bcm2838_dma_cfg },
{},
};
MODULE_DEVICE_TABLE(of, bcm2835_dma_of_match);
@@ -999,6 +1178,8 @@ static int bcm2835_dma_probe(struct plat
int irq_flags;
uint32_t chans_available;
char chan_name[BCM2835_DMA_CHAN_NAME_SIZE];
+ const struct of_device_id *of_id;
+ int chan_count, chan_start, chan_end;
if (!pdev->dev.dma_mask)
pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
@@ -1020,9 +1201,13 @@ static int bcm2835_dma_probe(struct plat
base = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(base))
return PTR_ERR(base);
- rc = bcm_dmaman_probe(pdev, base, BCM2835_DMA_BULK_MASK);
- if (rc)
- dev_err(&pdev->dev, "Failed to initialize the legacy API\n");
+
+ /* The set of channels can be split across multiple instances. */
+ chan_start = ((u32)(uintptr_t)base / BCM2835_DMA_CHAN_SIZE) & 0xf;
+ base -= BCM2835_DMA_CHAN(chan_start);
+ chan_count = resource_size(res) / BCM2835_DMA_CHAN_SIZE;
+ chan_end = min(chan_start + chan_count,
+ BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED + 1);
od->base = base;
@@ -1059,6 +1244,14 @@ static int bcm2835_dma_probe(struct plat
return -ENOMEM;
}
+ of_id = of_match_node(bcm2835_dma_of_match, pdev->dev.of_node);
+ if (!of_id) {
+ dev_err(&pdev->dev, "Failed to match compatible string\n");
+ return -EINVAL;
+ }
+
+ od->cfg_data = of_id->data;
+
/* Request DMA channel mask from device tree */
if (of_property_read_u32(pdev->dev.of_node,
"brcm,dma-channel-mask",
@@ -1068,18 +1261,34 @@ static int bcm2835_dma_probe(struct plat
goto err_no_dma;
}
- /* Channel 0 is used by the legacy API */
- chans_available &= ~BCM2835_DMA_BULK_MASK;
+ /* One channel is reserved for the legacy API */
+ if (chans_available & BCM2835_DMA_BULK_MASK) {
+ rc = bcm_dmaman_probe(pdev, base,
+ chans_available & BCM2835_DMA_BULK_MASK);
+ if (rc)
+ dev_err(&pdev->dev,
+ "Failed to initialize the legacy API\n");
+
+ chans_available &= ~BCM2835_DMA_BULK_MASK;
+ }
- /* We can't use channels 11-13 yet */
- chans_available &= ~(BIT(11) | BIT(12) | BIT(13));
+ /* And possibly one for the 40-bit DMA memcpy API */
+ if (chans_available & od->cfg_data->chan_40bit_mask &
+ BIT(BCM2838_DMA_MEMCPY_CHAN)) {
+ memcpy_parent = od;
+ memcpy_chan = BCM2835_DMA_CHANIO(base, BCM2838_DMA_MEMCPY_CHAN);
+ memcpy_scb = dma_alloc_coherent(memcpy_parent->ddev.dev,
+ sizeof(*memcpy_scb),
+ &memcpy_scb_dma, GFP_KERNEL);
+ if (!memcpy_scb)
+ dev_warn(&pdev->dev,
+ "Failed to allocated memcpy scb\n");
- /* Grab channel 14 for the 40-bit DMA memcpy */
- chans_available &= ~BIT(14);
- memcpy_chan = BCM2835_DMA_CHANIO(base, 14);
+ chans_available &= ~BIT(BCM2838_DMA_MEMCPY_CHAN);
+ }
/* get irqs for each channel that we support */
- for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) {
+ for (i = chan_start; i < chan_end; i++) {
/* skip masked out channels */
if (!(chans_available & (1 << i))) {
irq[i] = -1;
@@ -1102,13 +1311,17 @@ static int bcm2835_dma_probe(struct plat
irq[i] = platform_get_irq(pdev, i < 11 ? i : 11);
}
+ chan_count = 0;
+
/* get irqs for each channel */
- for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) {
+ for (i = chan_start; i < chan_end; i++) {
/* skip channels without irq */
if (irq[i] < 0)
continue;
/* check if there are other channels that also use this irq */
+ /* FIXME: This will fail if interrupts are shared across
+ instances */
irq_flags = 0;
for (j = 0; j <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; j++)
if ((i != j) && (irq[j] == irq[i])) {
@@ -1120,9 +1333,10 @@ static int bcm2835_dma_probe(struct plat
rc = bcm2835_dma_chan_init(od, i, irq[i], irq_flags);
if (rc)
goto err_no_dma;
+ chan_count++;
}
- dev_dbg(&pdev->dev, "Initialized %i DMA channels\n", i);
+ dev_dbg(&pdev->dev, "Initialized %i DMA channels\n", chan_count);
/* Device-tree DMA controller registration */
rc = of_dma_controller_register(pdev->dev.of_node,
@@ -1154,6 +1368,13 @@ static int bcm2835_dma_remove(struct pla
bcm_dmaman_remove(pdev);
dma_async_device_unregister(&od->ddev);
+ if (memcpy_parent == od) {
+ dma_free_coherent(&pdev->dev, sizeof(*memcpy_scb), memcpy_scb,
+ memcpy_scb_dma);
+ memcpy_parent = NULL;
+ memcpy_scb = NULL;
+ memcpy_chan = NULL;
+ }
bcm2835_dma_free(od);
return 0;
--- a/drivers/pci/controller/pcie-brcmstb-bounce.c
+++ b/drivers/pci/controller/pcie-brcmstb-bounce.c
@@ -91,7 +91,7 @@ struct dmabounce_device_info {
static struct dmabounce_device_info *g_dmabounce_device_info;
-extern int bcm2838_dma40_memcpy_init(struct device *dev);
+extern int bcm2838_dma40_memcpy_init(void);
extern void bcm2838_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size);
#ifdef STATS
@@ -465,9 +465,9 @@ static const struct dma_map_ops dmabounc
.dma_supported = dmabounce_dma_supported,
};
-int brcm_pcie_bounce_register_dev(struct device *dev,
- unsigned long buffer_size,
- dma_addr_t threshold)
+int brcm_pcie_bounce_init(struct device *dev,
+ unsigned long buffer_size,
+ dma_addr_t threshold)
{
struct dmabounce_device_info *device_info;
int ret;
@@ -476,9 +476,9 @@ int brcm_pcie_bounce_register_dev(struct
if (g_dmabounce_device_info)
return -EBUSY;
- ret = bcm2838_dma40_memcpy_init(dev);
+ ret = bcm2838_dma40_memcpy_init();
if (ret)
- return ret;
+ return ret;
device_info = kmalloc(sizeof(struct dmabounce_device_info), GFP_ATOMIC);
if (!device_info) {
@@ -509,9 +509,8 @@ int brcm_pcie_bounce_register_dev(struct
device_create_file(dev, &dev_attr_dmabounce_stats));
g_dmabounce_device_info = device_info;
- set_dma_ops(dev, &dmabounce_ops);
- dev_info(dev, "dmabounce: registered device - %ld kB, threshold %pad\n",
+ dev_info(dev, "dmabounce: initialised - %ld kB, threshold %pad\n",
buffer_size / 1024, &threshold);
return 0;
@@ -520,14 +519,13 @@ int brcm_pcie_bounce_register_dev(struct
kfree(device_info);
return ret;
}
-EXPORT_SYMBOL(brcm_pcie_bounce_register_dev);
+EXPORT_SYMBOL(brcm_pcie_bounce_init);
-void brcm_pcie_bounce_unregister_dev(struct device *dev)
+void brcm_pcie_bounce_uninit(struct device *dev)
{
struct dmabounce_device_info *device_info = g_dmabounce_device_info;
g_dmabounce_device_info = NULL;
- set_dma_ops(dev, NULL);
if (!device_info) {
dev_warn(dev,
@@ -548,10 +546,16 @@ void brcm_pcie_bounce_unregister_dev(str
device_remove_file(dev, &dev_attr_dmabounce_stats));
kfree(device_info);
+}
+EXPORT_SYMBOL(brcm_pcie_bounce_uninit);
+
+int brcm_pcie_bounce_register_dev(struct device *dev)
+{
+ set_dma_ops(dev, &dmabounce_ops);
- dev_info(dev, "dmabounce: device unregistered\n");
+ return 0;
}
-EXPORT_SYMBOL(brcm_pcie_bounce_unregister_dev);
+EXPORT_SYMBOL(brcm_pcie_bounce_register_dev);
MODULE_AUTHOR("Phil Elwell <phil@raspberrypi.org>");
MODULE_DESCRIPTION("Dedicate DMA bounce support for pcie-brcmstb");
--- a/drivers/pci/controller/pcie-brcmstb-bounce.h
+++ b/drivers/pci/controller/pcie-brcmstb-bounce.h
@@ -8,21 +8,26 @@
#ifdef CONFIG_ARM
-int brcm_pcie_bounce_register_dev(struct device *dev, unsigned long buffer_size,
- dma_addr_t threshold);
-
-int brcm_pcie_bounce_unregister_dev(struct device *dev);
+int brcm_pcie_bounce_init(struct device *dev, unsigned long buffer_size,
+ dma_addr_t threshold);
+int brcm_pcie_bounce_uninit(struct device *dev);
+int brcm_pcie_bounce_register_dev(struct device *dev);
#else
-static inline int brcm_pcie_bounce_register_dev(struct device *dev,
- unsigned long buffer_size,
- dma_addr_t threshold)
+static inline int brcm_pcie_bounce_init(struct device *dev,
+ unsigned long buffer_size,
+ dma_addr_t threshold)
+{
+ return 0;
+}
+
+static inline int brcm_pcie_bounce_uninit(struct device *dev)
{
return 0;
}
-static inline int brcm_pcie_bounce_unregister_dev(struct device *dev)
+static inline int brcm_pcie_bounce_register_dev(struct device *dev)
{
return 0;
}
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -644,6 +644,7 @@ static void brcm_set_dma_ops(struct devi
static inline void brcm_pcie_perst_set(struct brcm_pcie *pcie,
unsigned int val);
+
static int brcmstb_platform_notifier(struct notifier_block *nb,
unsigned long event, void *__dev)
{
@@ -657,12 +658,11 @@ static int brcmstb_platform_notifier(str
strcmp(dev->kobj.name, rc_name)) {
int ret;
- ret = brcm_pcie_bounce_register_dev(dev, bounce_buffer,
- (dma_addr_t)bounce_threshold);
+ ret = brcm_pcie_bounce_register_dev(dev);
if (ret) {
dev_err(dev,
"brcm_pcie_bounce_register_dev() failed: %d\n",
- ret);
+ ret);
return ret;
}
}
@@ -675,8 +675,6 @@ static int brcmstb_platform_notifier(str
brcm_pcie_perst_set(g_pcie, 1);
msleep(100);
brcm_pcie_perst_set(g_pcie, 0);
- } else if (max_pfn > (bounce_threshold/PAGE_SIZE)) {
- brcm_pcie_bounce_unregister_dev(dev);
}
return NOTIFY_OK;
@@ -1712,6 +1710,7 @@ static int brcm_pcie_probe(struct platfo
void __iomem *base;
struct pci_host_bridge *bridge;
struct pci_bus *child;
+ extern unsigned long max_pfn;
bridge = devm_pci_alloc_host_bridge(&pdev->dev, sizeof(*pcie));
if (!bridge)
@@ -1747,6 +1746,20 @@ static int brcm_pcie_probe(struct platfo
if (IS_ERR(base))
return PTR_ERR(base);
+ /* To Do: Add hardware check if this ever gets fixed */
+ if (max_pfn > (bounce_threshold/PAGE_SIZE)) {
+ int ret;
+ ret = brcm_pcie_bounce_init(&pdev->dev, bounce_buffer,
+ (dma_addr_t)bounce_threshold);
+ if (ret) {
+ if (ret != -EPROBE_DEFER)
+ dev_err(&pdev->dev,
+ "could not init bounce buffers: %d\n",
+ ret);
+ return ret;
+ }
+ }
+
pcie->clk = of_clk_get_by_name(dn, "sw_pcie");
if (IS_ERR(pcie->clk)) {
dev_warn(&pdev->dev, "could not get clock\n");

@ -1,27 +0,0 @@
From 510a127017a0aada2734dbf57d25aaa0189198ff Mon Sep 17 00:00:00 2001
From: Phil Elwell <phil@raspberrypi.org>
Date: Wed, 7 Aug 2019 17:19:33 +0100
Subject: [PATCH] pcie-brcmstb: Don't set DMA ops for root complex
A change to arm_get_dma_map_ops has stopped get_dma_ops from working
on the root complex, causing an error to be logged. However, there is
no need to override the DMA ops in that case, so skip it and
eliminate the error message.
Signed-off-by: Phil Elwell <phil@raspberrypi.org>
---
drivers/pci/controller/pcie-brcmstb.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -665,8 +665,8 @@ static int brcmstb_platform_notifier(str
ret);
return ret;
}
+ brcm_set_dma_ops(dev);
}
- brcm_set_dma_ops(dev);
return NOTIFY_OK;
case BUS_NOTIFY_DEL_DEVICE:

@ -1,713 +0,0 @@
From 60f3db31d4cb785befed715b80c430f60f647701 Mon Sep 17 00:00:00 2001
From: yaroslavros <yaroslavros@gmail.com>
Date: Wed, 14 Aug 2019 15:22:55 +0100
Subject: [PATCH] Ported pcie-brcmstb bounce buffer implementation to
ARM64. (#3144)
Ported pcie-brcmstb bounce buffer implementation to ARM64.
This enables full 4G RAM usage on Raspberry Pi in 64-bit mode.
Signed-off-by: Yaroslav Rosomakho <yaroslavros@gmail.com>
---
arch/arm64/mm/dma-mapping.c | 29 +
drivers/pci/controller/Makefile | 3 +
drivers/pci/controller/pcie-brcmstb-bounce.h | 2 +-
.../pci/controller/pcie-brcmstb-bounce64.c | 569 ++++++++++++++++++
drivers/pci/controller/pcie-brcmstb.c | 32 +-
5 files changed, 610 insertions(+), 25 deletions(-)
create mode 100644 drivers/pci/controller/pcie-brcmstb-bounce64.c
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -31,6 +31,35 @@ void arch_dma_prep_coherent(struct page
}
#ifdef CONFIG_IOMMU_DMA
+static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
+ struct page *page, size_t size)
+{
+ int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+
+ if (!ret)
+ sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
+
+ return ret;
+}
+
+static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
+ unsigned long pfn, size_t size)
+{
+ int ret = -ENXIO;
+ unsigned long nr_vma_pages = vma_pages(vma);
+ unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ unsigned long off = vma->vm_pgoff;
+
+ if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
+ ret = remap_pfn_range(vma, vma->vm_start,
+ pfn + off,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+ }
+
+ return ret;
+}
+
void arch_teardown_dma_ops(struct device *dev)
{
dev->dma_ops = NULL;
--- a/drivers/pci/controller/Makefile
+++ b/drivers/pci/controller/Makefile
@@ -33,6 +33,9 @@ obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcms
ifdef CONFIG_ARM
obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce.o
endif
+ifdef CONFIG_ARM64
+obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce64.o
+endif
obj-$(CONFIG_VMD) += vmd.o
# pcie-hisi.o quirks are needed even without CONFIG_PCIE_DW
--- a/drivers/pci/controller/pcie-brcmstb-bounce.h
+++ b/drivers/pci/controller/pcie-brcmstb-bounce.h
@@ -6,7 +6,7 @@
#ifndef _PCIE_BRCMSTB_BOUNCE_H
#define _PCIE_BRCMSTB_BOUNCE_H
-#ifdef CONFIG_ARM
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
int brcm_pcie_bounce_init(struct device *dev, unsigned long buffer_size,
dma_addr_t threshold);
--- /dev/null
+++ b/drivers/pci/controller/pcie-brcmstb-bounce64.c
@@ -0,0 +1,569 @@
+/*
+ * This code started out as a version of arch/arm/common/dmabounce.c,
+ * modified to cope with highmem pages. Now it has been changed heavily -
+ * it now preallocates a large block (currently 4MB) and carves it up
+ * sequentially in ring fashion, and DMA is used to copy the data - to the
+ * point where very little of the original remains.
+ *
+ * Copyright (C) 2019 Raspberry Pi (Trading) Ltd.
+ *
+ * Original version by Brad Parker (brad@heeltoe.com)
+ * Re-written by Christopher Hoover <ch@murgatroid.com>
+ * Made generic by Deepak Saxena <dsaxena@plexity.net>
+ *
+ * Copyright (C) 2002 Hewlett Packard Company.
+ * Copyright (C) 2004 MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/page-flags.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
+#include <linux/dmapool.h>
+#include <linux/list.h>
+#include <linux/scatterlist.h>
+#include <linux/bitmap.h>
+#include <linux/swiotlb.h>
+
+#include <asm/cacheflush.h>
+
+#define STATS
+
+#ifdef STATS
+#define DO_STATS(X) do { X ; } while (0)
+#else
+#define DO_STATS(X) do { } while (0)
+#endif
+
+/* ************************************************** */
+
+struct safe_buffer {
+ struct list_head node;
+
+ /* original request */
+ size_t size;
+ int direction;
+
+ struct dmabounce_pool *pool;
+ void *safe;
+ dma_addr_t unsafe_dma_addr;
+ dma_addr_t safe_dma_addr;
+};
+
+struct dmabounce_pool {
+ unsigned long pages;
+ void *virt_addr;
+ dma_addr_t dma_addr;
+ unsigned long *alloc_map;
+ unsigned long alloc_pos;
+ spinlock_t lock;
+ struct device *dev;
+ unsigned long num_pages;
+#ifdef STATS
+ size_t max_size;
+ unsigned long num_bufs;
+ unsigned long max_bufs;
+ unsigned long max_pages;
+#endif
+};
+
+struct dmabounce_device_info {
+ struct device *dev;
+ dma_addr_t threshold;
+ struct list_head safe_buffers;
+ struct dmabounce_pool pool;
+ rwlock_t lock;
+#ifdef STATS
+ unsigned long map_count;
+ unsigned long unmap_count;
+ unsigned long sync_dev_count;
+ unsigned long sync_cpu_count;
+ unsigned long fail_count;
+ int attr_res;
+#endif
+};
+
+static struct dmabounce_device_info *g_dmabounce_device_info;
+
+extern int bcm2838_dma40_memcpy_init(void);
+extern void bcm2838_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size);
+
+#ifdef STATS
+static ssize_t
+bounce_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct dmabounce_device_info *device_info = g_dmabounce_device_info;
+ return sprintf(buf, "m:%lu/%lu s:%lu/%lu f:%lu s:%zu b:%lu/%lu a:%lu/%lu\n",
+ device_info->map_count,
+ device_info->unmap_count,
+ device_info->sync_dev_count,
+ device_info->sync_cpu_count,
+ device_info->fail_count,
+ device_info->pool.max_size,
+ device_info->pool.num_bufs,
+ device_info->pool.max_bufs,
+ device_info->pool.num_pages * PAGE_SIZE,
+ device_info->pool.max_pages * PAGE_SIZE);
+}
+
+static DEVICE_ATTR(dmabounce_stats, 0444, bounce_show, NULL);
+#endif
+
+static int bounce_create(struct dmabounce_pool *pool, struct device *dev,
+ unsigned long buffer_size)
+{
+ int ret = -ENOMEM;
+ pool->pages = (buffer_size + PAGE_SIZE - 1)/PAGE_SIZE;
+ pool->alloc_map = bitmap_zalloc(pool->pages, GFP_KERNEL);
+ if (!pool->alloc_map)
+ goto err_bitmap;
+ pool->virt_addr = dma_alloc_coherent(dev, pool->pages * PAGE_SIZE,
+ &pool->dma_addr, GFP_KERNEL);
+ if (!pool->virt_addr)
+ goto err_dmabuf;
+
+ pool->alloc_pos = 0;
+ spin_lock_init(&pool->lock);
+ pool->dev = dev;
+ pool->num_pages = 0;
+
+ DO_STATS(pool->max_size = 0);
+ DO_STATS(pool->num_bufs = 0);
+ DO_STATS(pool->max_bufs = 0);
+ DO_STATS(pool->max_pages = 0);
+
+ return 0;
+
+err_dmabuf:
+ bitmap_free(pool->alloc_map);
+err_bitmap:
+ return ret;
+}
+
+static void bounce_destroy(struct dmabounce_pool *pool)
+{
+ dma_free_coherent(pool->dev, pool->pages * PAGE_SIZE, pool->virt_addr,
+ pool->dma_addr);
+
+ bitmap_free(pool->alloc_map);
+}
+
+static void *bounce_alloc(struct dmabounce_pool *pool, size_t size,
+ dma_addr_t *dmaaddrp)
+{
+ unsigned long pages;
+ unsigned long flags;
+ unsigned long pos;
+
+ pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
+
+ DO_STATS(pool->max_size = max(size, pool->max_size));
+
+ spin_lock_irqsave(&pool->lock, flags);
+ pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
+ pool->alloc_pos, pages, 0);
+ /* If not found, try from the start */
+ if (pos >= pool->pages && pool->alloc_pos)
+ pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
+ 0, pages, 0);
+
+ if (pos >= pool->pages) {
+ spin_unlock_irqrestore(&pool->lock, flags);
+ return NULL;
+ }
+
+ bitmap_set(pool->alloc_map, pos, pages);
+ pool->alloc_pos = (pos + pages) % pool->pages;
+ pool->num_pages += pages;
+
+ DO_STATS(pool->num_bufs++);
+ DO_STATS(pool->max_bufs = max(pool->num_bufs, pool->max_bufs));
+ DO_STATS(pool->max_pages = max(pool->num_pages, pool->max_pages));
+
+ spin_unlock_irqrestore(&pool->lock, flags);
+
+ *dmaaddrp = pool->dma_addr + pos * PAGE_SIZE;
+
+ return pool->virt_addr + pos * PAGE_SIZE;
+}
+
+static void
+bounce_free(struct dmabounce_pool *pool, void *buf, size_t size)
+{
+ unsigned long pages;
+ unsigned long flags;
+ unsigned long pos;
+
+ pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
+ pos = (buf - pool->virt_addr)/PAGE_SIZE;
+
+ BUG_ON((buf - pool->virt_addr) & (PAGE_SIZE - 1));
+
+ spin_lock_irqsave(&pool->lock, flags);
+ bitmap_clear(pool->alloc_map, pos, pages);
+ pool->num_pages -= pages;
+ if (pool->num_pages == 0)
+ pool->alloc_pos = 0;
+ DO_STATS(pool->num_bufs--);
+ spin_unlock_irqrestore(&pool->lock, flags);
+}
+
+/* allocate a 'safe' buffer and keep track of it */
+static struct safe_buffer *
+alloc_safe_buffer(struct dmabounce_device_info *device_info,
+ dma_addr_t dma_addr, size_t size, enum dma_data_direction dir)
+{
+ struct safe_buffer *buf;
+ struct dmabounce_pool *pool = &device_info->pool;
+ struct device *dev = device_info->dev;
+ unsigned long flags;
+
+ /*
+ * Although one might expect this to be called in thread context,
+ * using GFP_KERNEL here leads to hard-to-debug lockups. in_atomic()
+ * was previously used to select the appropriate allocation mode,
+ * but this is unsafe.
+ */
+ buf = kmalloc(sizeof(struct safe_buffer), GFP_ATOMIC);
+ if (!buf) {
+ dev_warn(dev, "%s: kmalloc failed\n", __func__);
+ return NULL;
+ }
+
+ buf->unsafe_dma_addr = dma_addr;
+ buf->size = size;
+ buf->direction = dir;
+ buf->pool = pool;
+
+ buf->safe = bounce_alloc(pool, size, &buf->safe_dma_addr);
+
+ if (!buf->safe) {
+ dev_warn(dev,
+ "%s: could not alloc dma memory (size=%zu)\n",
+ __func__, size);
+ kfree(buf);
+ return NULL;
+ }
+
+ write_lock_irqsave(&device_info->lock, flags);
+ list_add(&buf->node, &device_info->safe_buffers);
+ write_unlock_irqrestore(&device_info->lock, flags);
+
+ return buf;
+}
+
+/* determine if a buffer is from our "safe" pool */
+static struct safe_buffer *
+find_safe_buffer(struct dmabounce_device_info *device_info,
+ dma_addr_t safe_dma_addr)
+{
+ struct safe_buffer *b, *rb = NULL;
+ unsigned long flags;
+
+ read_lock_irqsave(&device_info->lock, flags);
+
+ list_for_each_entry(b, &device_info->safe_buffers, node)
+ if (b->safe_dma_addr <= safe_dma_addr &&
+ b->safe_dma_addr + b->size > safe_dma_addr) {
+ rb = b;
+ break;
+ }
+
+ read_unlock_irqrestore(&device_info->lock, flags);
+ return rb;
+}
+
+static void
+free_safe_buffer(struct dmabounce_device_info *device_info,
+ struct safe_buffer *buf)
+{
+ unsigned long flags;
+
+ write_lock_irqsave(&device_info->lock, flags);
+ list_del(&buf->node);
+ write_unlock_irqrestore(&device_info->lock, flags);
+
+ bounce_free(buf->pool, buf->safe, buf->size);
+
+ kfree(buf);
+}
+
+/* ************************************************** */
+
+static struct safe_buffer *
+find_safe_buffer_dev(struct device *dev, dma_addr_t dma_addr, const char *where)
+{
+ if (!dev || !g_dmabounce_device_info)
+ return NULL;
+ if (dma_mapping_error(dev, dma_addr)) {
+ dev_err(dev, "Trying to %s invalid mapping\n", where);
+ return NULL;
+ }
+ return find_safe_buffer(g_dmabounce_device_info, dma_addr);
+}
+
+static dma_addr_t
+map_single(struct device *dev, struct safe_buffer *buf, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ BUG_ON(buf->size != size);
+ BUG_ON(buf->direction != dir);
+
+ dev_dbg(dev, "map: %llx->%llx\n", (u64)buf->unsafe_dma_addr,
+ (u64)buf->safe_dma_addr);
+
+ if ((dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) &&
+ !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ bcm2838_dma40_memcpy(buf->safe_dma_addr, buf->unsafe_dma_addr,
+ size);
+
+ return buf->safe_dma_addr;
+}
+
+static dma_addr_t
+unmap_single(struct device *dev, struct safe_buffer *buf, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ BUG_ON(buf->size != size);
+ BUG_ON(buf->direction != dir);
+
+ if ((dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) &&
+ !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
+ dev_dbg(dev, "unmap: %llx->%llx\n", (u64)buf->safe_dma_addr,
+ (u64)buf->unsafe_dma_addr);
+
+ bcm2838_dma40_memcpy(buf->unsafe_dma_addr, buf->safe_dma_addr,
+ size);
+ }
+ return buf->unsafe_dma_addr;
+}
+
+/* ************************************************** */
+
+/*
+ * see if a buffer address is in an 'unsafe' range. if it is
+ * allocate a 'safe' buffer and copy the unsafe buffer into it.
+ * substitute the safe buffer for the unsafe one.
+ * (basically move the buffer from an unsafe area to a safe one)
+ */
+static dma_addr_t
+dmabounce_map_page(struct device *dev, struct page *page, unsigned long offset,
+ size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct dmabounce_device_info *device_info = g_dmabounce_device_info;
+ dma_addr_t dma_addr;
+
+ dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset;
+
+ dma_direct_sync_single_for_device(dev, dma_addr, size, dir);
+ if (!dev_is_dma_coherent(dev))
+ __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
+
+ if (device_info && (dma_addr + size) > device_info->threshold) {
+ struct safe_buffer *buf;
+
+ buf = alloc_safe_buffer(device_info, dma_addr, size, dir);
+ if (!buf) {
+ DO_STATS(device_info->fail_count++);
+ return (~(dma_addr_t)0x0);
+ }
+
+ DO_STATS(device_info->map_count++);
+
+ dma_addr = map_single(dev, buf, size, dir, attrs);
+ }
+ return dma_addr;
+}
+
+/*
+ * see if a mapped address was really a "safe" buffer and if so, copy
+ * the data from the safe buffer back to the unsafe buffer and free up
+ * the safe buffer. (basically return things back to the way they
+ * should be)
+ */
+static void
+dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ struct safe_buffer *buf;
+
+ buf = find_safe_buffer_dev(dev, dma_addr, __func__);
+ if (buf) {
+ DO_STATS(g_dmabounce_device_info->unmap_count++);
+ dma_addr = unmap_single(dev, buf, size, dir, attrs);
+ free_safe_buffer(g_dmabounce_device_info, buf);
+ }
+
+ if (!dev_is_dma_coherent(dev))
+ __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
+ dma_direct_sync_single_for_cpu(dev, dma_addr, size, dir);
+}
+
+/*
+ * A version of dmabounce_map_page that assumes the mapping has already
+ * been created - intended for streaming operation.
+ */
+static void
+dmabounce_sync_for_device(struct device *dev, dma_addr_t dma_addr, size_t size,
+ enum dma_data_direction dir)
+{
+ struct safe_buffer *buf;
+
+ dma_direct_sync_single_for_device(dev, dma_addr, size, dir);
+ if (!dev_is_dma_coherent(dev))
+ __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
+
+ buf = find_safe_buffer_dev(dev, dma_addr, __func__);
+ if (buf) {
+ DO_STATS(g_dmabounce_device_info->sync_dev_count++);
+ map_single(dev, buf, size, dir, 0);
+ }
+}
+
+/*
+ * A version of dmabounce_unmap_page that doesn't destroy the mapping -
+ * intended for streaming operation.
+ */
+static void
+dmabounce_sync_for_cpu(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction dir)
+{
+ struct safe_buffer *buf;
+
+ buf = find_safe_buffer_dev(dev, dma_addr, __func__);
+ if (buf) {
+ DO_STATS(g_dmabounce_device_info->sync_cpu_count++);
+ dma_addr = unmap_single(dev, buf, size, dir, 0);
+ }
+
+ if (!dev_is_dma_coherent(dev))
+ __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
+ dma_direct_sync_single_for_cpu(dev, dma_addr, size, dir);
+}
+
+static int dmabounce_dma_supported(struct device *dev, u64 dma_mask)
+{
+ if (g_dmabounce_device_info)
+ return 0;
+
+ return dma_direct_supported(dev, dma_mask);
+}
+
+static const struct dma_map_ops dmabounce_ops = {
+ .alloc = dma_direct_alloc,
+ .free = dma_direct_free,
+ .map_page = dmabounce_map_page,
+ .unmap_page = dmabounce_unmap_page,
+ .sync_single_for_cpu = dmabounce_sync_for_cpu,
+ .sync_single_for_device = dmabounce_sync_for_device,
+ .map_sg = dma_direct_map_sg,
+ .unmap_sg = dma_direct_unmap_sg,
+ .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu,
+ .sync_sg_for_device = dma_direct_sync_sg_for_device,
+ .dma_supported = dmabounce_dma_supported,
+};
+
+int brcm_pcie_bounce_init(struct device *dev,
+ unsigned long buffer_size,
+ dma_addr_t threshold)
+{
+ struct dmabounce_device_info *device_info;
+ int ret;
+
+ /* Only support a single client */
+ if (g_dmabounce_device_info)
+ return -EBUSY;
+
+ ret = bcm2838_dma40_memcpy_init();
+ if (ret)
+ return ret;
+
+ device_info = kmalloc(sizeof(struct dmabounce_device_info), GFP_ATOMIC);
+ if (!device_info) {
+ dev_err(dev,
+ "Could not allocated dmabounce_device_info\n");
+ return -ENOMEM;
+ }
+
+ ret = bounce_create(&device_info->pool, dev, buffer_size);
+ if (ret) {
+ dev_err(dev,
+ "dmabounce: could not allocate %ld byte DMA pool\n",
+ buffer_size);
+ goto err_bounce;
+ }
+
+ device_info->dev = dev;
+ device_info->threshold = threshold;
+ INIT_LIST_HEAD(&device_info->safe_buffers);
+ rwlock_init(&device_info->lock);
+
+ DO_STATS(device_info->map_count = 0);
+ DO_STATS(device_info->unmap_count = 0);
+ DO_STATS(device_info->sync_dev_count = 0);
+ DO_STATS(device_info->sync_cpu_count = 0);
+ DO_STATS(device_info->fail_count = 0);
+ DO_STATS(device_info->attr_res =
+ device_create_file(dev, &dev_attr_dmabounce_stats));
+
+ g_dmabounce_device_info = device_info;
+
+ dev_err(dev, "dmabounce: initialised - %ld kB, threshold %pad\n",
+ buffer_size / 1024, &threshold);
+
+ return 0;
+
+ err_bounce:
+ kfree(device_info);
+ return ret;
+}
+EXPORT_SYMBOL(brcm_pcie_bounce_init);
+
+void brcm_pcie_bounce_uninit(struct device *dev)
+{
+ struct dmabounce_device_info *device_info = g_dmabounce_device_info;
+
+ g_dmabounce_device_info = NULL;
+
+ if (!device_info) {
+ dev_warn(dev,
+ "Never registered with dmabounce but attempting"
+ "to unregister!\n");
+ return;
+ }
+
+ if (!list_empty(&device_info->safe_buffers)) {
+ dev_err(dev,
+ "Removing from dmabounce with pending buffers!\n");
+ BUG();
+ }
+
+ bounce_destroy(&device_info->pool);
+
+ DO_STATS(if (device_info->attr_res == 0)
+ device_remove_file(dev, &dev_attr_dmabounce_stats));
+
+ kfree(device_info);
+}
+EXPORT_SYMBOL(brcm_pcie_bounce_uninit);
+
+int brcm_pcie_bounce_register_dev(struct device *dev)
+{
+ set_dma_ops(dev, &dmabounce_ops);
+
+ return 0;
+}
+EXPORT_SYMBOL(brcm_pcie_bounce_register_dev);
+
+MODULE_AUTHOR("Phil Elwell <phil@raspberrypi.org>");
+MODULE_DESCRIPTION("Dedicate DMA bounce support for pcie-brcmstb");
+MODULE_LICENSE("GPL");
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -611,28 +611,6 @@ static const struct dma_map_ops brcm_dma
static void brcm_set_dma_ops(struct device *dev)
{
- int ret;
-
- if (IS_ENABLED(CONFIG_ARM64)) {
- /*
- * We are going to invoke get_dma_ops(). That
- * function, at this point in time, invokes
- * get_arch_dma_ops(), and for ARM64 that function
- * returns a pointer to dummy_dma_ops. So then we'd
- * like to call arch_setup_dma_ops(), but that isn't
- * exported. Instead, we call of_dma_configure(),
- * which is exported, and this calls
- * arch_setup_dma_ops(). Once we do this the call to
- * get_dma_ops() will work properly because
- * dev->dma_ops will be set.
- */
- ret = of_dma_configure(dev, dev->of_node, true);
- if (ret) {
- dev_err(dev, "of_dma_configure() failed: %d\n", ret);
- return;
- }
- }
-
arch_dma_ops = get_dma_ops(dev);
if (!arch_dma_ops) {
dev_err(dev, "failed to get arch_dma_ops\n");
@@ -651,12 +629,12 @@ static int brcmstb_platform_notifier(str
extern unsigned long max_pfn;
struct device *dev = __dev;
const char *rc_name = "0000:00:00.0";
+ int ret;
switch (event) {
case BUS_NOTIFY_ADD_DEVICE:
if (max_pfn > (bounce_threshold/PAGE_SIZE) &&
strcmp(dev->kobj.name, rc_name)) {
- int ret;
ret = brcm_pcie_bounce_register_dev(dev);
if (ret) {
@@ -665,8 +643,14 @@ static int brcmstb_platform_notifier(str
ret);
return ret;
}
- brcm_set_dma_ops(dev);
+ } else if (IS_ENABLED(CONFIG_ARM64)) {
+ ret = of_dma_configure(dev, dev->of_node, true);
+ if (ret) {
+ dev_err(dev, "of_dma_configure() failed: %d\n", ret);
+ return ret;
+ }
}
+ brcm_set_dma_ops(dev);
return NOTIFY_OK;
case BUS_NOTIFY_DEL_DEVICE:

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save