DMAENGINE: ste_dma40: allocate LCLA dynamically

author Linus Walleij <linus.walleij@stericsson.com>

Sun, 20 Jun 2010 21:26:07 +0000 (21:26 +0000)

committer Dan Williams <dan.j.williams@intel.com>

Wed, 23 Jun 2010 01:01:55 +0000 (18:01 -0700)
author Linus Walleij <linus.walleij@stericsson.com>
Sun, 20 Jun 2010 21:26:07 +0000 (21:26 +0000)
committer Dan Williams <dan.j.williams@intel.com>
Wed, 23 Jun 2010 01:01:55 +0000 (18:01 -0700)
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c

index 8c46bb803dbbc02588a813701eb80dedf4892c6e..5748e96f00dea97d7786308463edbd85c06c5b65 100644 (file)
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -30,6 +30,12 @@
  /* Maximum iterations taken before giving up suspending a channel */
  #define D40_SUSPEND_MAX_IT 500
  
+/* Hardware requirement on LCLA alignment */
+#define LCLA_ALIGNMENT 0x40000
+/* Attempts before giving up to trying to get pages that are aligned */
+#define MAX_LCLA_ALLOC_ATTEMPTS 256
+
+/* Bit markings for allocation map */
  #define D40_ALLOC_FREE         (1 << 31)
  #define D40_ALLOC_PHY          (1 << 30)
  #define D40_ALLOC_LOG_FREE     0
@@ -64,9 +70,9 @@ enum d40_command {
   */
  struct d40_lli_pool {
         void    *base;
-       int     size;
+       int      size;
         /* Space for dst and src, plus an extra for padding */
-       u8      pre_alloc_lli[3 * sizeof(struct d40_phy_lli)];
+       u8       pre_alloc_lli[3 * sizeof(struct d40_phy_lli)];
  };
  
  /**
@@ -111,18 +117,20 @@ struct d40_desc {
  /**
   * struct d40_lcla_pool - LCLA pool settings and data.
   *
- * @base: The virtual address of LCLA.
- * @phy: Physical base address of LCLA.
- * @base_size: size of lcla.
+ * @base: The virtual address of LCLA. 18 bit aligned.
+ * @base_unaligned: The orignal kmalloc pointer, if kmalloc is used.
+ * This pointer is only there for clean-up on error.
+ * @pages: The number of pages needed for all physical channels.
+ * Only used later for clean-up on error
   * @lock: Lock to protect the content in this struct.
- * @alloc_map: Mapping between physical channel and LCLA entries.
+ * @alloc_map: Bitmap mapping between physical channel and LCLA entries.
   * @num_blocks: The number of entries of alloc_map. Equals to the
   * number of physical channels.
   */
  struct d40_lcla_pool {
         void            *base;
-       dma_addr_t       phy;
-       resource_size_t  base_size;
+       void            *base_unaligned;
+       int              pages;
         spinlock_t       lock;
         u32             *alloc_map;
         int              num_blocks;
@@ -432,13 +440,12 @@ static struct d40_desc *d40_first_queued(struct d40_chan *d40c)
  
  /* Support functions for logical channels */
  
-static int d40_lcla_id_get(struct d40_chan *d40c,
-                          struct d40_lcla_pool *pool)
+static int d40_lcla_id_get(struct d40_chan *d40c)
  {
         int src_id = 0;
         int dst_id = 0;
         struct d40_log_lli *lcla_lidx_base =
-               pool->base + d40c->phy_chan->num * 1024;
+               d40c->base->lcla_pool.base + d40c->phy_chan->num * 1024;
         int i;
         int lli_per_log = d40c->base->plat_data->llis_per_log;
         unsigned long flags;
@@ -446,24 +453,28 @@ static int d40_lcla_id_get(struct d40_chan *d40c,
         if (d40c->lcla.src_id >= 0 && d40c->lcla.dst_id >= 0)
                 return 0;
  
-       if (pool->num_blocks > 32)
+       if (d40c->base->lcla_pool.num_blocks > 32)
                 return -EINVAL;
  
-       spin_lock_irqsave(&pool->lock, flags);
+       spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
  
-       for (i = 0; i < pool->num_blocks; i++) {
-               if (!(pool->alloc_map[d40c->phy_chan->num] & (0x1 << i))) {
-                       pool->alloc_map[d40c->phy_chan->num] |= (0x1 << i);
+       for (i = 0; i < d40c->base->lcla_pool.num_blocks; i++) {
+               if (!(d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] &
+                     (0x1 << i))) {
+                       d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] |=
+                               (0x1 << i);
                         break;
                 }
         }
         src_id = i;
-       if (src_id >= pool->num_blocks)
+       if (src_id >= d40c->base->lcla_pool.num_blocks)
                 goto err;
  
-       for (; i < pool->num_blocks; i++) {
-               if (!(pool->alloc_map[d40c->phy_chan->num] & (0x1 << i))) {
-                       pool->alloc_map[d40c->phy_chan->num] |= (0x1 << i);
+       for (; i < d40c->base->lcla_pool.num_blocks; i++) {
+               if (!(d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] &
+                     (0x1 << i))) {
+                       d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] |=
+                               (0x1 << i);
                         break;
                 }
         }
@@ -477,29 +488,13 @@ static int d40_lcla_id_get(struct d40_chan *d40c,
         d40c->lcla.dst = lcla_lidx_base + dst_id * lli_per_log + 1;
         d40c->lcla.src = lcla_lidx_base + src_id * lli_per_log + 1;
  
-
-       spin_unlock_irqrestore(&pool->lock, flags);
+       spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
         return 0;
  err:
-       spin_unlock_irqrestore(&pool->lock, flags);
+       spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
         return -EINVAL;
  }
  
-static void d40_lcla_id_put(struct d40_chan *d40c,
-                           struct d40_lcla_pool *pool,
-                           int id)
-{
-       unsigned long flags;
-       if (id < 0)
-               return;
-
-       d40c->lcla.src_id = -1;
-       d40c->lcla.dst_id = -1;
-
-       spin_lock_irqsave(&pool->lock, flags);
-       pool->alloc_map[d40c->phy_chan->num] &= (~(0x1 << id));
-       spin_unlock_irqrestore(&pool->lock, flags);
-}
  
  static int d40_channel_execute_command(struct d40_chan *d40c,
                                        enum d40_command command)
@@ -567,6 +562,7 @@ done:
  static void d40_term_all(struct d40_chan *d40c)
  {
         struct d40_desc *d40d;
+       unsigned long flags;
  
         /* Release active descriptors */
         while ((d40d = d40_first_active_get(d40c))) {
@@ -584,10 +580,17 @@ static void d40_term_all(struct d40_chan *d40c)
                 d40_desc_free(d40c, d40d);
         }
  
-       d40_lcla_id_put(d40c, &d40c->base->lcla_pool,
-                       d40c->lcla.src_id);
-       d40_lcla_id_put(d40c, &d40c->base->lcla_pool,
-                       d40c->lcla.dst_id);
+       spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
+
+       d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] &=
+               (~(0x1 << d40c->lcla.dst_id));
+       d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num] &=
+               (~(0x1 << d40c->lcla.src_id));
+
+       d40c->lcla.src_id = -1;
+       d40c->lcla.dst_id = -1;
+
+       spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
  
         d40c->pending_tx = 0;
         d40c->busy = false;
@@ -703,7 +706,6 @@ static int d40_config_write(struct d40_chan *d40c)
  
  static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
  {
-
         if (d40d->lli_phy.dst && d40d->lli_phy.src) {
                 d40_phy_lli_write(d40c->base->virtbase,
                                   d40c->phy_chan->num,
@@ -712,13 +714,24 @@ static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
         } else if (d40d->lli_log.dst && d40d->lli_log.src) {
                 struct d40_log_lli *src = d40d->lli_log.src;
                 struct d40_log_lli *dst = d40d->lli_log.dst;
+               int s;
  
                 src += d40d->lli_count;
                 dst += d40d->lli_count;
-               d40_log_lli_write(d40c->lcpa, d40c->lcla.src,
-                                 d40c->lcla.dst,
-                                 dst, src,
-                                 d40c->base->plat_data->llis_per_log);
+               s = d40_log_lli_write(d40c->lcpa,
+                                     d40c->lcla.src, d40c->lcla.dst,
+                                     dst, src,
+                                     d40c->base->plat_data->llis_per_log);
+
+               /* If s equals to zero, the job is not linked */
+               if (s > 0) {
+                       (void) dma_map_single(d40c->base->dev, d40c->lcla.src,
+                                             s * sizeof(struct d40_log_lli),
+                                             DMA_TO_DEVICE);
+                       (void) dma_map_single(d40c->base->dev, d40c->lcla.dst,
+                                             s * sizeof(struct d40_log_lli),
+                                             DMA_TO_DEVICE);
+               }
         }
         d40d->lli_count += d40d->lli_tx_len;
  }
@@ -930,7 +943,8 @@ static irqreturn_t d40_handle_interrupt(int irq, void *data)
                 if (!il[row].is_error)
                         dma_tc_handle(d40c);
                 else
-                       dev_err(base->dev, "[%s] IRQ chan: %ld offset %d idx %d\n",
+                       dev_err(base->dev,
+                               "[%s] IRQ chan: %ld offset %d idx %d\n",
                                 __func__, chan, il[row].offset, idx);
  
                 spin_unlock(&d40c->lock);
@@ -1089,7 +1103,8 @@ static int d40_allocate_channel(struct d40_chan *d40c)
         int j;
         int log_num;
         bool is_src;
-       bool is_log = (d40c->dma_cfg.channel_type & STEDMA40_CHANNEL_IN_OPER_MODE)
+       bool is_log = (d40c->dma_cfg.channel_type &
+                      STEDMA40_CHANNEL_IN_OPER_MODE)
                 == STEDMA40_CHANNEL_IN_LOG_MODE;
  
  
@@ -1124,8 +1139,10 @@ static int d40_allocate_channel(struct d40_chan *d40c)
                         for (j = 0; j < d40c->base->num_phy_chans; j += 8) {
                                 int phy_num = j  + event_group * 2;
                                 for (i = phy_num; i < phy_num + 2; i++) {
-                                       if (d40_alloc_mask_set(&phys[i], is_src,
-                                                              0, is_log))
+                                       if (d40_alloc_mask_set(&phys[i],
+                                                              is_src,
+                                                              0,
+                                                              is_log))
                                                 goto found_phy;
                                 }
                         }
@@ -1396,13 +1413,14 @@ static u32 d40_residue(struct d40_chan *d40c)
         u32 num_elt;
  
         if (d40c->log_num != D40_PHY_CHAN)
-               num_elt = (readl(&d40c->lcpa->lcsp2) &  D40_MEM_LCSP2_ECNT_MASK)
+               num_elt = (readl(&d40c->lcpa->lcsp2) & D40_MEM_LCSP2_ECNT_MASK)
                         >> D40_MEM_LCSP2_ECNT_POS;
         else
                 num_elt = (readl(d40c->base->virtbase + D40_DREG_PCBASE +
                                  d40c->phy_chan->num * D40_DREG_PCDELTA +
                                  D40_CHAN_REG_SDELT) &
-                          D40_SREG_ELEM_PHY_ECNT_MASK) >> D40_SREG_ELEM_PHY_ECNT_POS;
+                          D40_SREG_ELEM_PHY_ECNT_MASK) >>
+                       D40_SREG_ELEM_PHY_ECNT_POS;
         return num_elt * (1 << d40c->dma_cfg.dst_info.data_width);
  }
  
@@ -1455,8 +1473,10 @@ int stedma40_set_psize(struct dma_chan *chan,
         if (d40c->log_num != D40_PHY_CHAN) {
                 d40c->log_def.lcsp1 &= ~D40_MEM_LCSP1_SCFG_PSIZE_MASK;
                 d40c->log_def.lcsp3 &= ~D40_MEM_LCSP1_SCFG_PSIZE_MASK;
-               d40c->log_def.lcsp1 |= src_psize << D40_MEM_LCSP1_SCFG_PSIZE_POS;
-               d40c->log_def.lcsp3 |= dst_psize << D40_MEM_LCSP1_SCFG_PSIZE_POS;
+               d40c->log_def.lcsp1 |= src_psize <<
+                       D40_MEM_LCSP1_SCFG_PSIZE_POS;
+               d40c->log_def.lcsp3 |= dst_psize <<
+                       D40_MEM_LCSP1_SCFG_PSIZE_POS;
                 goto out;
         }
  
@@ -1521,8 +1541,7 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
                          * split list into 1-length and run only in lcpa
                          * space.
                          */
-                       if (d40_lcla_id_get(d40c,
-                                           &d40c->base->lcla_pool) != 0)
+                       if (d40_lcla_id_get(d40c) != 0)
                                 d40d->lli_tx_len = 1;
  
                 if (d40_pool_lli_alloc(d40d, sgl_len, true) < 0) {
@@ -1849,7 +1868,7 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d,
                  * If not, split list into 1-length and run only
                  * in lcpa space.
                  */
-               if (d40_lcla_id_get(d40c, &d40c->base->lcla_pool) != 0)
+               if (d40_lcla_id_get(d40c) != 0)
                         d40d->lli_tx_len = 1;
  
         if (direction == DMA_FROM_DEVICE)
@@ -2476,6 +2495,78 @@ static void __init d40_hw_init(struct d40_base *base)
  
  }
  
+static int __init d40_lcla_allocate(struct d40_base *base)
+{
+       unsigned long *page_list;
+       int i, j;
+       int ret = 0;
+
+       /*
+        * This is somewhat ugly. We need 8192 bytes that are 18 bit aligned,
+        * To full fill this hardware requirement without wasting 256 kb
+        * we allocate pages until we get an aligned one.
+        */
+       page_list = kmalloc(sizeof(unsigned long) * MAX_LCLA_ALLOC_ATTEMPTS,
+                           GFP_KERNEL);
+
+       if (!page_list) {
+               ret = -ENOMEM;
+               goto failure;
+       }
+
+       /* Calculating how many pages that are required */
+       base->lcla_pool.pages = SZ_1K * base->num_phy_chans / PAGE_SIZE;
+
+       for (i = 0; i < MAX_LCLA_ALLOC_ATTEMPTS; i++) {
+               page_list[i] = __get_free_pages(GFP_KERNEL,
+                                               base->lcla_pool.pages);
+               if (!page_list[i]) {
+
+                       dev_err(base->dev,
+                               "[%s] Failed to allocate %d pages.\n",
+                               __func__, base->lcla_pool.pages);
+
+                       for (j = 0; j < i; j++)
+                               free_pages(page_list[j], base->lcla_pool.pages);
+                       goto failure;
+               }
+
+               if ((virt_to_phys((void *)page_list[i]) &
+                    (LCLA_ALIGNMENT - 1)) == 0)
+                       break;
+       }
+
+       for (j = 0; j < i; j++)
+               free_pages(page_list[j], base->lcla_pool.pages);
+
+       if (i < MAX_LCLA_ALLOC_ATTEMPTS) {
+               base->lcla_pool.base = (void *)page_list[i];
+       } else {
+               /* After many attempts, no succees with finding the correct
+                * alignment try with allocating a big buffer */
+               dev_warn(base->dev,
+                        "[%s] Failed to get %d pages @ 18 bit align.\n",
+                        __func__, base->lcla_pool.pages);
+               base->lcla_pool.base_unaligned = kmalloc(SZ_1K *
+                                                        base->num_phy_chans +
+                                                        LCLA_ALIGNMENT,
+                                                        GFP_KERNEL);
+               if (!base->lcla_pool.base_unaligned) {
+                       ret = -ENOMEM;
+                       goto failure;
+               }
+
+               base->lcla_pool.base = PTR_ALIGN(base->lcla_pool.base_unaligned,
+                                                LCLA_ALIGNMENT);
+       }
+
+       writel(virt_to_phys(base->lcla_pool.base),
+              base->virtbase + D40_DREG_LCLA);
+failure:
+       kfree(page_list);
+       return ret;
+}
+
  static int __init d40_probe(struct platform_device *pdev)
  {
         int err;
@@ -2535,41 +2626,11 @@ static int __init d40_probe(struct platform_device *pdev)
                         __func__);
                 goto failure;
         }
-       /* Get IO for logical channel link address */
-       res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "lcla");
-       if (!res) {
-               ret = -ENOENT;
-               dev_err(&pdev->dev,
-                       "[%s] No \"lcla\" resource defined\n",
-                       __func__);
-               goto failure;
-       }
  
-       base->lcla_pool.base_size = resource_size(res);
-       base->lcla_pool.phy = res->start;
-
-       if (request_mem_region(res->start, resource_size(res),
-                              D40_NAME " I/O lcla") == NULL) {
-               ret = -EBUSY;
-               dev_err(&pdev->dev,
-                       "[%s] Failed to request LCLA region 0x%x-0x%x\n",
-                       __func__, res->start, res->end);
-               goto failure;
-       }
-       val = readl(base->virtbase + D40_DREG_LCLA);
-       if (res->start != val && val != 0) {
-               dev_warn(&pdev->dev,
-                        "[%s] Mismatch LCLA dma 0x%x, def 0x%x\n",
-                        __func__, val, res->start);
-       } else
-               writel(res->start, base->virtbase + D40_DREG_LCLA);
-
-       base->lcla_pool.base = ioremap(res->start, resource_size(res));
-       if (!base->lcla_pool.base) {
-               ret = -ENOMEM;
-               dev_err(&pdev->dev,
-                       "[%s] Failed to ioremap LCLA 0x%x-0x%x\n",
-                       __func__, res->start, res->end);
+       ret = d40_lcla_allocate(base);
+       if (ret) {
+               dev_err(&pdev->dev, "[%s] Failed to allocate LCLA area\n",
+                       __func__);
                 goto failure;
         }
  
@@ -2601,9 +2662,11 @@ failure:
                         kmem_cache_destroy(base->desc_slab);
                 if (base->virtbase)
                         iounmap(base->virtbase);
-               if (base->lcla_pool.phy)
-                       release_mem_region(base->lcla_pool.phy,
-                                          base->lcla_pool.base_size);
+               if (!base->lcla_pool.base_unaligned && base->lcla_pool.base)
+                       free_pages((unsigned long)base->lcla_pool.base,
+                                  base->lcla_pool.pages);
+               if (base->lcla_pool.base_unaligned)
+                       kfree(base->lcla_pool.base_unaligned);
                 if (base->phy_lcpa)
                         release_mem_region(base->phy_lcpa,
                                            base->lcpa_size);
diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c

index 772636be13eb171a53f49e4b8045c8a179a9254f..d937f76d6e2e67a20a49838ed1a494c05edd6375 100644 (file)
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -420,7 +420,7 @@ int d40_log_sg_to_lli(int lcla_id,
         return total_size;
  }
  
-void d40_log_lli_write(struct d40_log_lli_full *lcpa,
+int d40_log_lli_write(struct d40_log_lli_full *lcpa,
                        struct d40_log_lli *lcla_src,
                        struct d40_log_lli *lcla_dst,
                        struct d40_log_lli *lli_dst,
@@ -448,4 +448,7 @@ void d40_log_lli_write(struct d40_log_lli_full *lcpa,
                 slos = lli_src[i + 1].lcsp13 & D40_MEM_LCSP1_SLOS_MASK;
                 dlos = lli_dst[i + 1].lcsp13 & D40_MEM_LCSP3_DLOS_MASK;
         }
+
+       return i;
+
  }
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h

index c081f28ec1e30fb6a634e42e537d967627c94abd..9c0fa2f5fe570697768c23b703d2e7faf69e56f3 100644 (file)
--- a/drivers/dma/ste_dma40_ll.h
+++ b/drivers/dma/ste_dma40_ll.h
@@ -339,12 +339,12 @@ int d40_log_sg_to_dev(struct d40_lcla_elem *lcla,
                       bool term_int, dma_addr_t dev_addr, int max_len,
                       int llis_per_log);
  
-void d40_log_lli_write(struct d40_log_lli_full *lcpa,
-                      struct d40_log_lli *lcla_src,
-                      struct d40_log_lli *lcla_dst,
-                      struct d40_log_lli *lli_dst,
-                      struct d40_log_lli *lli_src,
-                      int llis_per_log);
+int d40_log_lli_write(struct d40_log_lli_full *lcpa,
+                     struct d40_log_lli *lcla_src,
+                     struct d40_log_lli *lcla_dst,
+                     struct d40_log_lli *lli_dst,
+                     struct d40_log_lli *lli_src,
+                     int llis_per_log);
  
  int d40_log_sg_to_lli(int lcla_id,
                       struct scatterlist *sg,
author	Linus Walleij <linus.walleij@stericsson.com>
	Sun, 20 Jun 2010 21:26:07 +0000 (21:26 +0000)
committer	Dan Williams <dan.j.williams@intel.com>
	Wed, 23 Jun 2010 01:01:55 +0000 (18:01 -0700)
drivers/dma/ste_dma40.c		patch \| blob \| blame \| history
drivers/dma/ste_dma40_ll.c		patch \| blob \| blame \| history
drivers/dma/ste_dma40_ll.h		patch \| blob \| blame \| history