qlcnic: Add description for CN1000Q adapter

[net-next-2.6.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index a9649f4b261e6b3c01632939c46a77f19f447de1..07a654486f75cfdfe13035f7bf501e2d2dbf0a4a 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -21,6 +21,7 @@
  #include <linux/pagemap.h>
  #include <linux/jiffies.h>
  #include <linux/bootmem.h>
+#include <linux/memblock.h>
  #include <linux/compiler.h>
  #include <linux/kernel.h>
  #include <linux/kmemcheck.h>
@@ -530,7 +531,7 @@ static inline void __free_one_page(struct page *page,
          * so it's less likely to be used soon and more likely to be merged
          * as a higher order page
          */
-       if ((order < MAX_ORDER-1) && pfn_valid_within(page_to_pfn(buddy))) {
+       if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) {
                 struct page *higher_page, *higher_buddy;
                 combined_idx = __find_combined_index(page_idx, order);
                 higher_page = page + combined_idx - page_idx;
@@ -588,13 +589,13 @@ static void free_pcppages_bulk(struct zone *zone, int count,
  {
         int migratetype = 0;
         int batch_free = 0;
+       int to_free = count;
  
         spin_lock(&zone->lock);
         zone->all_unreclaimable = 0;
         zone->pages_scanned = 0;
  
-       __mod_zone_page_state(zone, NR_FREE_PAGES, count);
-       while (count) {
+       while (to_free) {
                 struct page *page;
                 struct list_head *list;
  
@@ -619,8 +620,9 @@ static void free_pcppages_bulk(struct zone *zone, int count,
                         /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
                         __free_one_page(page, zone, 0, page_private(page));
                         trace_mm_page_pcpu_drain(page, 0, page_private(page));
-               } while (--count && --batch_free && !list_empty(list));
+               } while (--to_free && --batch_free && !list_empty(list));
         }
+       __mod_zone_page_state(zone, NR_FREE_PAGES, count);
         spin_unlock(&zone->lock);
  }
  
@@ -631,8 +633,8 @@ static void free_one_page(struct zone *zone, struct page *page, int order,
         zone->all_unreclaimable = 0;
         zone->pages_scanned = 0;
  
-       __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
         __free_one_page(page, zone, order, migratetype);
+       __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
         spin_unlock(&zone->lock);
  }
  
@@ -1461,7 +1463,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
  {
         /* free_pages my go negative - that's OK */
         long min = mark;
-       long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1;
+       long free_pages = zone_nr_free_pages(z) - (1 << order) + 1;
         int o;
  
         if (alloc_flags & ALLOC_HIGH)
@@ -1846,6 +1848,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
         struct page *page = NULL;
         struct reclaim_state reclaim_state;
         struct task_struct *p = current;
+       bool drained = false;
  
         cond_resched();
  
@@ -1864,14 +1867,25 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
  
         cond_resched();
  
-       if (order != 0)
-               drain_all_pages();
+       if (unlikely(!(*did_some_progress)))
+               return NULL;
  
-       if (likely(*did_some_progress))
-               page = get_page_from_freelist(gfp_mask, nodemask, order,
+retry:
+       page = get_page_from_freelist(gfp_mask, nodemask, order,
                                         zonelist, high_zoneidx,
                                         alloc_flags, preferred_zone,
                                         migratetype);
+
+       /*
+        * If an allocation failed after direct reclaim, it could be because
+        * pages are pinned on the per-cpu lists. Drain them and try again
+        */
+       if (!page && !drained) {
+               drain_all_pages();
+               drained = true;
+               goto retry;
+       }
+
         return page;
  }
  
@@ -1893,7 +1907,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
                         preferred_zone, migratetype);
  
                 if (!page && gfp_mask & __GFP_NOFAIL)
-                       congestion_wait(BLK_RW_ASYNC, HZ/50);
+                       wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
         } while (!page && (gfp_mask & __GFP_NOFAIL));
  
         return page;
@@ -1918,7 +1932,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
         const gfp_t wait = gfp_mask & __GFP_WAIT;
  
         /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
-       BUILD_BUG_ON(__GFP_HIGH != ALLOC_HIGH);
+       BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
  
         /*
          * The caller may dip into page reserves a bit more if the caller
@@ -1926,7 +1940,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
          * policy or is asking for __GFP_HIGH memory.  GFP_ATOMIC requests will
          * set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH).
          */
-       alloc_flags |= (gfp_mask & __GFP_HIGH);
+       alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
  
         if (!wait) {
                 alloc_flags |= ALLOC_HARDER;
@@ -2081,7 +2095,7 @@ rebalance:
         pages_reclaimed += did_some_progress;
         if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
                 /* Wait for some write requests to complete then retry */
-               congestion_wait(BLK_RW_ASYNC, HZ/50);
+               wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
                 goto rebalance;
         }
  
@@ -2423,7 +2437,7 @@ void show_free_areas(void)
                         " all_unreclaimable? %s"
                         "\n",
                         zone->name,
-                       K(zone_page_state(zone, NR_FREE_PAGES)),
+                       K(zone_nr_free_pages(zone)),
                         K(min_wmark_pages(zone)),
                         K(low_wmark_pages(zone)),
                         K(high_wmark_pages(zone)),
@@ -3623,6 +3637,41 @@ void __init free_bootmem_with_active_regions(int nid,
         }
  }
  
+#ifdef CONFIG_HAVE_MEMBLOCK
+u64 __init find_memory_core_early(int nid, u64 size, u64 align,
+                                       u64 goal, u64 limit)
+{
+       int i;
+
+       /* Need to go over early_node_map to find out good range for node */
+       for_each_active_range_index_in_nid(i, nid) {
+               u64 addr;
+               u64 ei_start, ei_last;
+               u64 final_start, final_end;
+
+               ei_last = early_node_map[i].end_pfn;
+               ei_last <<= PAGE_SHIFT;
+               ei_start = early_node_map[i].start_pfn;
+               ei_start <<= PAGE_SHIFT;
+
+               final_start = max(ei_start, goal);
+               final_end = min(ei_last, limit);
+
+               if (final_start >= final_end)
+                       continue;
+
+               addr = memblock_find_in_range(final_start, final_end, size, align);
+
+               if (addr == MEMBLOCK_ERROR)
+                       continue;
+
+               return addr;
+       }
+
+       return MEMBLOCK_ERROR;
+}
+#endif
+
  int __init add_from_early_node_map(struct range *range, int az,
                                    int nr_range, int nid)
  {
@@ -3642,46 +3691,26 @@ int __init add_from_early_node_map(struct range *range, int az,
  void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
                                         u64 goal, u64 limit)
  {
-       int i;
         void *ptr;
+       u64 addr;
  
-       if (limit > get_max_mapped())
-               limit = get_max_mapped();
-
-       /* need to go over early_node_map to find out good range for node */
-       for_each_active_range_index_in_nid(i, nid) {
-               u64 addr;
-               u64 ei_start, ei_last;
-
-               ei_last = early_node_map[i].end_pfn;
-               ei_last <<= PAGE_SHIFT;
-               ei_start = early_node_map[i].start_pfn;
-               ei_start <<= PAGE_SHIFT;
-               addr = find_early_area(ei_start, ei_last,
-                                        goal, limit, size, align);
+       if (limit > memblock.current_limit)
+               limit = memblock.current_limit;
  
-               if (addr == -1ULL)
-                       continue;
+       addr = find_memory_core_early(nid, size, align, goal, limit);
  
-#if 0
-               printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n",
-                               nid,
-                               ei_start, ei_last, goal, limit, size,
-                               align, addr);
-#endif
-
-               ptr = phys_to_virt(addr);
-               memset(ptr, 0, size);
-               reserve_early_without_check(addr, addr + size, "BOOTMEM");
-               /*
-                * The min_count is set to 0 so that bootmem allocated blocks
-                * are never reported as leaks.
-                */
-               kmemleak_alloc(ptr, size, 0, 0);
-               return ptr;
-       }
+       if (addr == MEMBLOCK_ERROR)
+               return NULL;
  
-       return NULL;
+       ptr = phys_to_virt(addr);
+       memset(ptr, 0, size);
+       memblock_x86_reserve_range(addr, addr + size, "BOOTMEM");
+       /*
+        * The min_count is set to 0 so that bootmem allocated blocks
+        * are never reported as leaks.
+        */
+       kmemleak_alloc(ptr, size, 0, 0);
+       return ptr;
  }
  #endif
  
@@ -5169,9 +5198,9 @@ void *__init alloc_large_system_hash(const char *tablename,
         if (!table)
                 panic("Failed to allocate %s hash table\n", tablename);
  
-       printk(KERN_INFO "%s hash table entries: %d (order: %d, %lu bytes)\n",
+       printk(KERN_INFO "%s hash table entries: %ld (order: %d, %lu bytes)\n",
                tablename,
-              (1U << log2qty),
+              (1UL << log2qty),
                ilog2(size) - PAGE_SHIFT,
                size);
  
@@ -5268,12 +5297,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
   * page allocater never alloc memory from ISOLATE block.
   */
  
+static int
+__count_immobile_pages(struct zone *zone, struct page *page, int count)
+{
+       unsigned long pfn, iter, found;
+       /*
+        * For avoiding noise data, lru_add_drain_all() should be called
+        * If ZONE_MOVABLE, the zone never contains immobile pages
+        */
+       if (zone_idx(zone) == ZONE_MOVABLE)
+               return true;
+
+       if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE)
+               return true;
+
+       pfn = page_to_pfn(page);
+       for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) {
+               unsigned long check = pfn + iter;
+
+               if (!pfn_valid_within(check)) {
+                       iter++;
+                       continue;
+               }
+               page = pfn_to_page(check);
+               if (!page_count(page)) {
+                       if (PageBuddy(page))
+                               iter += (1 << page_order(page)) - 1;
+                       continue;
+               }
+               if (!PageLRU(page))
+                       found++;
+               /*
+                * If there are RECLAIMABLE pages, we need to check it.
+                * But now, memory offline itself doesn't call shrink_slab()
+                * and it still to be fixed.
+                */
+               /*
+                * If the page is not RAM, page_count()should be 0.
+                * we don't need more check. This is an _used_ not-movable page.
+                *
+                * The problematic thing here is PG_reserved pages. PG_reserved
+                * is set to both of a memory hole page and a _used_ kernel
+                * page at boot.
+                */
+               if (found > count)
+                       return false;
+       }
+       return true;
+}
+
+bool is_pageblock_removable_nolock(struct page *page)
+{
+       struct zone *zone = page_zone(page);
+       return __count_immobile_pages(zone, page, 0);
+}
+
  int set_migratetype_isolate(struct page *page)
  {
         struct zone *zone;
-       struct page *curr_page;
-       unsigned long flags, pfn, iter;
-       unsigned long immobile = 0;
+       unsigned long flags, pfn;
         struct memory_isolate_notify arg;
         int notifier_ret;
         int ret = -EBUSY;
@@ -5283,11 +5365,6 @@ int set_migratetype_isolate(struct page *page)
         zone_idx = zone_idx(zone);
  
         spin_lock_irqsave(&zone->lock, flags);
-       if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE ||
-           zone_idx == ZONE_MOVABLE) {
-               ret = 0;
-               goto out;
-       }
  
         pfn = page_to_pfn(page);
         arg.start_pfn = pfn;
@@ -5307,23 +5384,20 @@ int set_migratetype_isolate(struct page *page)
          */
         notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
         notifier_ret = notifier_to_errno(notifier_ret);
-       if (notifier_ret || !arg.pages_found)
+       if (notifier_ret)
                 goto out;
-
-       for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {
-               if (!pfn_valid_within(pfn))
-                       continue;
-
-               curr_page = pfn_to_page(iter);
-               if (!page_count(curr_page) || PageLRU(curr_page))
-                       continue;
-
-               immobile++;
-       }
-
-       if (arg.pages_found == immobile)
+       /*
+        * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
+        * We just check MOVABLE pages.
+        */
+       if (__count_immobile_pages(zone, page, arg.pages_found))
                 ret = 0;
  
+       /*
+        * immobile means "not-on-lru" paes. If immobile is larger than
+        * removable-by-driver pages reported by notifier, we'll fail.
+        */
+
  out:
         if (!ret) {
                 set_pageblock_migratetype(page, MIGRATE_ISOLATE);