#include <linux/pagemap.h>
#include <linux/jiffies.h>
#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/kmemcheck.h>
* so it's less likely to be used soon and more likely to be merged
* as a higher order page
*/
- if ((order < MAX_ORDER-1) && pfn_valid_within(page_to_pfn(buddy))) {
+ if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) {
struct page *higher_page, *higher_buddy;
combined_idx = __find_combined_index(page_idx, order);
higher_page = page + combined_idx - page_idx;
{
int migratetype = 0;
int batch_free = 0;
+ int to_free = count;
spin_lock(&zone->lock);
zone->all_unreclaimable = 0;
zone->pages_scanned = 0;
- __mod_zone_page_state(zone, NR_FREE_PAGES, count);
- while (count) {
+ while (to_free) {
struct page *page;
struct list_head *list;
/* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
__free_one_page(page, zone, 0, page_private(page));
trace_mm_page_pcpu_drain(page, 0, page_private(page));
- } while (--count && --batch_free && !list_empty(list));
+ } while (--to_free && --batch_free && !list_empty(list));
}
+ __mod_zone_page_state(zone, NR_FREE_PAGES, count);
spin_unlock(&zone->lock);
}
zone->all_unreclaimable = 0;
zone->pages_scanned = 0;
- __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
__free_one_page(page, zone, order, migratetype);
+ __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
spin_unlock(&zone->lock);
}
{
/* free_pages my go negative - that's OK */
long min = mark;
- long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1;
+ long free_pages = zone_nr_free_pages(z) - (1 << order) + 1;
int o;
if (alloc_flags & ALLOC_HIGH)
struct page *page = NULL;
struct reclaim_state reclaim_state;
struct task_struct *p = current;
+ bool drained = false;
cond_resched();
cond_resched();
- if (order != 0)
- drain_all_pages();
+ if (unlikely(!(*did_some_progress)))
+ return NULL;
- if (likely(*did_some_progress))
- page = get_page_from_freelist(gfp_mask, nodemask, order,
+retry:
+ page = get_page_from_freelist(gfp_mask, nodemask, order,
zonelist, high_zoneidx,
alloc_flags, preferred_zone,
migratetype);
+
+ /*
+ * If an allocation failed after direct reclaim, it could be because
+ * pages are pinned on the per-cpu lists. Drain them and try again
+ */
+ if (!page && !drained) {
+ drain_all_pages();
+ drained = true;
+ goto retry;
+ }
+
return page;
}
preferred_zone, migratetype);
if (!page && gfp_mask & __GFP_NOFAIL)
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
} while (!page && (gfp_mask & __GFP_NOFAIL));
return page;
const gfp_t wait = gfp_mask & __GFP_WAIT;
/* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
- BUILD_BUG_ON(__GFP_HIGH != ALLOC_HIGH);
+ BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
/*
* The caller may dip into page reserves a bit more if the caller
* policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will
* set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH).
*/
- alloc_flags |= (gfp_mask & __GFP_HIGH);
+ alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
if (!wait) {
alloc_flags |= ALLOC_HARDER;
pages_reclaimed += did_some_progress;
if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
/* Wait for some write requests to complete then retry */
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
goto rebalance;
}
" all_unreclaimable? %s"
"\n",
zone->name,
- K(zone_page_state(zone, NR_FREE_PAGES)),
+ K(zone_nr_free_pages(zone)),
K(min_wmark_pages(zone)),
K(low_wmark_pages(zone)),
K(high_wmark_pages(zone)),
}
}
+#ifdef CONFIG_HAVE_MEMBLOCK
+u64 __init find_memory_core_early(int nid, u64 size, u64 align,
+ u64 goal, u64 limit)
+{
+ int i;
+
+ /* Need to go over early_node_map to find out good range for node */
+ for_each_active_range_index_in_nid(i, nid) {
+ u64 addr;
+ u64 ei_start, ei_last;
+ u64 final_start, final_end;
+
+ ei_last = early_node_map[i].end_pfn;
+ ei_last <<= PAGE_SHIFT;
+ ei_start = early_node_map[i].start_pfn;
+ ei_start <<= PAGE_SHIFT;
+
+ final_start = max(ei_start, goal);
+ final_end = min(ei_last, limit);
+
+ if (final_start >= final_end)
+ continue;
+
+ addr = memblock_find_in_range(final_start, final_end, size, align);
+
+ if (addr == MEMBLOCK_ERROR)
+ continue;
+
+ return addr;
+ }
+
+ return MEMBLOCK_ERROR;
+}
+#endif
+
int __init add_from_early_node_map(struct range *range, int az,
int nr_range, int nid)
{
void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
u64 goal, u64 limit)
{
- int i;
void *ptr;
+ u64 addr;
- if (limit > get_max_mapped())
- limit = get_max_mapped();
-
- /* need to go over early_node_map to find out good range for node */
- for_each_active_range_index_in_nid(i, nid) {
- u64 addr;
- u64 ei_start, ei_last;
-
- ei_last = early_node_map[i].end_pfn;
- ei_last <<= PAGE_SHIFT;
- ei_start = early_node_map[i].start_pfn;
- ei_start <<= PAGE_SHIFT;
- addr = find_early_area(ei_start, ei_last,
- goal, limit, size, align);
+ if (limit > memblock.current_limit)
+ limit = memblock.current_limit;
- if (addr == -1ULL)
- continue;
+ addr = find_memory_core_early(nid, size, align, goal, limit);
-#if 0
- printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n",
- nid,
- ei_start, ei_last, goal, limit, size,
- align, addr);
-#endif
-
- ptr = phys_to_virt(addr);
- memset(ptr, 0, size);
- reserve_early_without_check(addr, addr + size, "BOOTMEM");
- /*
- * The min_count is set to 0 so that bootmem allocated blocks
- * are never reported as leaks.
- */
- kmemleak_alloc(ptr, size, 0, 0);
- return ptr;
- }
+ if (addr == MEMBLOCK_ERROR)
+ return NULL;
- return NULL;
+ ptr = phys_to_virt(addr);
+ memset(ptr, 0, size);
+ memblock_x86_reserve_range(addr, addr + size, "BOOTMEM");
+ /*
+ * The min_count is set to 0 so that bootmem allocated blocks
+ * are never reported as leaks.
+ */
+ kmemleak_alloc(ptr, size, 0, 0);
+ return ptr;
}
#endif
if (!table)
panic("Failed to allocate %s hash table\n", tablename);
- printk(KERN_INFO "%s hash table entries: %d (order: %d, %lu bytes)\n",
+ printk(KERN_INFO "%s hash table entries: %ld (order: %d, %lu bytes)\n",
tablename,
- (1U << log2qty),
+ (1UL << log2qty),
ilog2(size) - PAGE_SHIFT,
size);
* page allocater never alloc memory from ISOLATE block.
*/
+static int
+__count_immobile_pages(struct zone *zone, struct page *page, int count)
+{
+ unsigned long pfn, iter, found;
+ /*
+ * For avoiding noise data, lru_add_drain_all() should be called
+ * If ZONE_MOVABLE, the zone never contains immobile pages
+ */
+ if (zone_idx(zone) == ZONE_MOVABLE)
+ return true;
+
+ if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE)
+ return true;
+
+ pfn = page_to_pfn(page);
+ for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) {
+ unsigned long check = pfn + iter;
+
+ if (!pfn_valid_within(check)) {
+ iter++;
+ continue;
+ }
+ page = pfn_to_page(check);
+ if (!page_count(page)) {
+ if (PageBuddy(page))
+ iter += (1 << page_order(page)) - 1;
+ continue;
+ }
+ if (!PageLRU(page))
+ found++;
+ /*
+ * If there are RECLAIMABLE pages, we need to check it.
+ * But now, memory offline itself doesn't call shrink_slab()
+ * and it still to be fixed.
+ */
+ /*
+ * If the page is not RAM, page_count()should be 0.
+ * we don't need more check. This is an _used_ not-movable page.
+ *
+ * The problematic thing here is PG_reserved pages. PG_reserved
+ * is set to both of a memory hole page and a _used_ kernel
+ * page at boot.
+ */
+ if (found > count)
+ return false;
+ }
+ return true;
+}
+
+bool is_pageblock_removable_nolock(struct page *page)
+{
+ struct zone *zone = page_zone(page);
+ return __count_immobile_pages(zone, page, 0);
+}
+
int set_migratetype_isolate(struct page *page)
{
struct zone *zone;
- struct page *curr_page;
- unsigned long flags, pfn, iter;
- unsigned long immobile = 0;
+ unsigned long flags, pfn;
struct memory_isolate_notify arg;
int notifier_ret;
int ret = -EBUSY;
zone_idx = zone_idx(zone);
spin_lock_irqsave(&zone->lock, flags);
- if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE ||
- zone_idx == ZONE_MOVABLE) {
- ret = 0;
- goto out;
- }
pfn = page_to_pfn(page);
arg.start_pfn = pfn;
*/
notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
notifier_ret = notifier_to_errno(notifier_ret);
- if (notifier_ret || !arg.pages_found)
+ if (notifier_ret)
goto out;
-
- for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {
- if (!pfn_valid_within(pfn))
- continue;
-
- curr_page = pfn_to_page(iter);
- if (!page_count(curr_page) || PageLRU(curr_page))
- continue;
-
- immobile++;
- }
-
- if (arg.pages_found == immobile)
+ /*
+ * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
+ * We just check MOVABLE pages.
+ */
+ if (__count_immobile_pages(zone, page, arg.pages_found))
ret = 0;
+ /*
+ * immobile means "not-on-lru" paes. If immobile is larger than
+ * removable-by-driver pages reported by notifier, we'll fail.
+ */
+
out:
if (!ret) {
set_pageblock_migratetype(page, MIGRATE_ISOLATE);