ipv6: AF_INET6 link address family

[net-next-2.6.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index f12ad1836abe115b1b8e3bf4b9187c01249e8a30..07a654486f75cfdfe13035f7bf501e2d2dbf0a4a 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -21,6 +21,7 @@
  #include <linux/pagemap.h>
  #include <linux/jiffies.h>
  #include <linux/bootmem.h>
+#include <linux/memblock.h>
  #include <linux/compiler.h>
  #include <linux/kernel.h>
  #include <linux/kmemcheck.h>
@@ -530,7 +531,7 @@ static inline void __free_one_page(struct page *page,
          * so it's less likely to be used soon and more likely to be merged
          * as a higher order page
          */
-       if ((order < MAX_ORDER-1) && pfn_valid_within(page_to_pfn(buddy))) {
+       if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) {
                 struct page *higher_page, *higher_buddy;
                 combined_idx = __find_combined_index(page_idx, order);
                 higher_page = page + combined_idx - page_idx;
@@ -1906,7 +1907,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
                         preferred_zone, migratetype);
  
                 if (!page && gfp_mask & __GFP_NOFAIL)
-                       congestion_wait(BLK_RW_ASYNC, HZ/50);
+                       wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
         } while (!page && (gfp_mask & __GFP_NOFAIL));
  
         return page;
@@ -1931,7 +1932,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
         const gfp_t wait = gfp_mask & __GFP_WAIT;
  
         /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
-       BUILD_BUG_ON(__GFP_HIGH != ALLOC_HIGH);
+       BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
  
         /*
          * The caller may dip into page reserves a bit more if the caller
@@ -1939,7 +1940,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
          * policy or is asking for __GFP_HIGH memory.  GFP_ATOMIC requests will
          * set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH).
          */
-       alloc_flags |= (gfp_mask & __GFP_HIGH);
+       alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
  
         if (!wait) {
                 alloc_flags |= ALLOC_HARDER;
@@ -2094,7 +2095,7 @@ rebalance:
         pages_reclaimed += did_some_progress;
         if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
                 /* Wait for some write requests to complete then retry */
-               congestion_wait(BLK_RW_ASYNC, HZ/50);
+               wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
                 goto rebalance;
         }
  
@@ -3636,6 +3637,41 @@ void __init free_bootmem_with_active_regions(int nid,
         }
  }
  
+#ifdef CONFIG_HAVE_MEMBLOCK
+u64 __init find_memory_core_early(int nid, u64 size, u64 align,
+                                       u64 goal, u64 limit)
+{
+       int i;
+
+       /* Need to go over early_node_map to find out good range for node */
+       for_each_active_range_index_in_nid(i, nid) {
+               u64 addr;
+               u64 ei_start, ei_last;
+               u64 final_start, final_end;
+
+               ei_last = early_node_map[i].end_pfn;
+               ei_last <<= PAGE_SHIFT;
+               ei_start = early_node_map[i].start_pfn;
+               ei_start <<= PAGE_SHIFT;
+
+               final_start = max(ei_start, goal);
+               final_end = min(ei_last, limit);
+
+               if (final_start >= final_end)
+                       continue;
+
+               addr = memblock_find_in_range(final_start, final_end, size, align);
+
+               if (addr == MEMBLOCK_ERROR)
+                       continue;
+
+               return addr;
+       }
+
+       return MEMBLOCK_ERROR;
+}
+#endif
+
  int __init add_from_early_node_map(struct range *range, int az,
                                    int nr_range, int nid)
  {
@@ -3655,46 +3691,26 @@ int __init add_from_early_node_map(struct range *range, int az,
  void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
                                         u64 goal, u64 limit)
  {
-       int i;
         void *ptr;
+       u64 addr;
  
-       if (limit > get_max_mapped())
-               limit = get_max_mapped();
+       if (limit > memblock.current_limit)
+               limit = memblock.current_limit;
  
-       /* need to go over early_node_map to find out good range for node */
-       for_each_active_range_index_in_nid(i, nid) {
-               u64 addr;
-               u64 ei_start, ei_last;
+       addr = find_memory_core_early(nid, size, align, goal, limit);
  
-               ei_last = early_node_map[i].end_pfn;
-               ei_last <<= PAGE_SHIFT;
-               ei_start = early_node_map[i].start_pfn;
-               ei_start <<= PAGE_SHIFT;
-               addr = find_early_area(ei_start, ei_last,
-                                        goal, limit, size, align);
-
-               if (addr == -1ULL)
-                       continue;
-
-#if 0
-               printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n",
-                               nid,
-                               ei_start, ei_last, goal, limit, size,
-                               align, addr);
-#endif
-
-               ptr = phys_to_virt(addr);
-               memset(ptr, 0, size);
-               reserve_early_without_check(addr, addr + size, "BOOTMEM");
-               /*
-                * The min_count is set to 0 so that bootmem allocated blocks
-                * are never reported as leaks.
-                */
-               kmemleak_alloc(ptr, size, 0, 0);
-               return ptr;
-       }
+       if (addr == MEMBLOCK_ERROR)
+               return NULL;
  
-       return NULL;
+       ptr = phys_to_virt(addr);
+       memset(ptr, 0, size);
+       memblock_x86_reserve_range(addr, addr + size, "BOOTMEM");
+       /*
+        * The min_count is set to 0 so that bootmem allocated blocks
+        * are never reported as leaks.
+        */
+       kmemleak_alloc(ptr, size, 0, 0);
+       return ptr;
  }
  #endif
  
@@ -5281,12 +5297,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
   * page allocater never alloc memory from ISOLATE block.
   */
  
+static int
+__count_immobile_pages(struct zone *zone, struct page *page, int count)
+{
+       unsigned long pfn, iter, found;
+       /*
+        * For avoiding noise data, lru_add_drain_all() should be called
+        * If ZONE_MOVABLE, the zone never contains immobile pages
+        */
+       if (zone_idx(zone) == ZONE_MOVABLE)
+               return true;
+
+       if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE)
+               return true;
+
+       pfn = page_to_pfn(page);
+       for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) {
+               unsigned long check = pfn + iter;
+
+               if (!pfn_valid_within(check)) {
+                       iter++;
+                       continue;
+               }
+               page = pfn_to_page(check);
+               if (!page_count(page)) {
+                       if (PageBuddy(page))
+                               iter += (1 << page_order(page)) - 1;
+                       continue;
+               }
+               if (!PageLRU(page))
+                       found++;
+               /*
+                * If there are RECLAIMABLE pages, we need to check it.
+                * But now, memory offline itself doesn't call shrink_slab()
+                * and it still to be fixed.
+                */
+               /*
+                * If the page is not RAM, page_count()should be 0.
+                * we don't need more check. This is an _used_ not-movable page.
+                *
+                * The problematic thing here is PG_reserved pages. PG_reserved
+                * is set to both of a memory hole page and a _used_ kernel
+                * page at boot.
+                */
+               if (found > count)
+                       return false;
+       }
+       return true;
+}
+
+bool is_pageblock_removable_nolock(struct page *page)
+{
+       struct zone *zone = page_zone(page);
+       return __count_immobile_pages(zone, page, 0);
+}
+
  int set_migratetype_isolate(struct page *page)
  {
         struct zone *zone;
-       struct page *curr_page;
-       unsigned long flags, pfn, iter;
-       unsigned long immobile = 0;
+       unsigned long flags, pfn;
         struct memory_isolate_notify arg;
         int notifier_ret;
         int ret = -EBUSY;
@@ -5296,11 +5365,6 @@ int set_migratetype_isolate(struct page *page)
         zone_idx = zone_idx(zone);
  
         spin_lock_irqsave(&zone->lock, flags);
-       if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE ||
-           zone_idx == ZONE_MOVABLE) {
-               ret = 0;
-               goto out;
-       }
  
         pfn = page_to_pfn(page);
         arg.start_pfn = pfn;
@@ -5320,23 +5384,20 @@ int set_migratetype_isolate(struct page *page)
          */
         notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
         notifier_ret = notifier_to_errno(notifier_ret);
-       if (notifier_ret || !arg.pages_found)
+       if (notifier_ret)
                 goto out;
-
-       for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {
-               if (!pfn_valid_within(pfn))
-                       continue;
-
-               curr_page = pfn_to_page(iter);
-               if (!page_count(curr_page) || PageLRU(curr_page))
-                       continue;
-
-               immobile++;
-       }
-
-       if (arg.pages_found == immobile)
+       /*
+        * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
+        * We just check MOVABLE pages.
+        */
+       if (__count_immobile_pages(zone, page, arg.pages_found))
                 ret = 0;
  
+       /*
+        * immobile means "not-on-lru" paes. If immobile is larger than
+        * removable-by-driver pages reported by notifier, we'll fail.
+        */
+
  out:
         if (!ret) {
                 set_pageblock_migratetype(page, MIGRATE_ISOLATE);