From ed2f240094f900833ac06f533ab8bbcf0a1e8199 Mon Sep 17 00:00:00 2001 From: Zhang Zhen Date: Thu, 9 Oct 2014 15:26:31 -0700 Subject: memory-hotplug: add sysfs valid_zones attribute Currently memory-hotplug has two limits: 1. If the memory block is in ZONE_NORMAL, you can change it to ZONE_MOVABLE, but this memory block must be adjacent to ZONE_MOVABLE. 2. If the memory block is in ZONE_MOVABLE, you can change it to ZONE_NORMAL, but this memory block must be adjacent to ZONE_NORMAL. With this patch, we can easy to know a memory block can be onlined to which zone, and don't need to know the above two limits. Updated the related Documentation. [akpm@linux-foundation.org: use conventional comment layout] [akpm@linux-foundation.org: fix build with CONFIG_MEMORY_HOTREMOVE=n] [akpm@linux-foundation.org: remove unused local zone_prev] Signed-off-by: Zhang Zhen Cc: Dave Hansen Cc: David Rientjes Cc: Toshi Kani Cc: Yasuaki Ishimatsu Cc: Naoya Horiguchi Cc: Wang Nan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'drivers') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index a2e13e250bba..7c5d87191b28 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -373,6 +373,45 @@ static ssize_t show_phys_device(struct device *dev, return sprintf(buf, "%d\n", mem->phys_device); } +#ifdef CONFIG_MEMORY_HOTREMOVE +static ssize_t show_valid_zones(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct memory_block *mem = to_memory_block(dev); + unsigned long start_pfn, end_pfn; + unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; + struct page *first_page; + struct zone *zone; + + start_pfn = section_nr_to_pfn(mem->start_section_nr); + end_pfn = start_pfn + nr_pages; + first_page = pfn_to_page(start_pfn); + + /* The block contains more than one zone can not be offlined. */ + if (!test_pages_in_a_zone(start_pfn, end_pfn)) + return sprintf(buf, "none\n"); + + zone = page_zone(first_page); + + if (zone_idx(zone) == ZONE_MOVABLE - 1) { + /*The mem block is the last memoryblock of this zone.*/ + if (end_pfn == zone_end_pfn(zone)) + return sprintf(buf, "%s %s\n", + zone->name, (zone + 1)->name); + } + + if (zone_idx(zone) == ZONE_MOVABLE) { + /*The mem block is the first memoryblock of ZONE_MOVABLE.*/ + if (start_pfn == zone->zone_start_pfn) + return sprintf(buf, "%s %s\n", + zone->name, (zone - 1)->name); + } + + return sprintf(buf, "%s\n", zone->name); +} +static DEVICE_ATTR(valid_zones, 0444, show_valid_zones, NULL); +#endif + static DEVICE_ATTR(phys_index, 0444, show_mem_start_phys_index, NULL); static DEVICE_ATTR(state, 0644, show_mem_state, store_mem_state); static DEVICE_ATTR(phys_device, 0444, show_phys_device, NULL); @@ -523,6 +562,9 @@ static struct attribute *memory_memblk_attrs[] = { &dev_attr_state.attr, &dev_attr_phys_device.attr, &dev_attr_removable.attr, +#ifdef CONFIG_MEMORY_HOTREMOVE + &dev_attr_valid_zones.attr, +#endif NULL }; -- cgit From 513510ddba9650fc7da456eefeb0ead7632324f6 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 9 Oct 2014 15:26:40 -0700 Subject: common: dma-mapping: introduce common remapping functions For architectures without coherent DMA, memory for DMA may need to be remapped with coherent attributes. Factor out the the remapping code from arm and put it in a common location to reduce code duplication. As part of this, the arm APIs are now migrated away from ioremap_page_range to the common APIs which use map_vm_area for remapping. This should be an equivalent change and using map_vm_area is more correct as ioremap_page_range is intended to bring in io addresses into the cpu space and not regular kernel managed memory. Signed-off-by: Laura Abbott Reviewed-by: Catalin Marinas Cc: Arnd Bergmann Cc: David Riley Cc: Olof Johansson Cc: Ritesh Harjain Cc: Russell King Cc: Thierry Reding Cc: Will Deacon Cc: James Hogan Cc: Laura Abbott Cc: Mitchel Humpherys Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/dma-mapping.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'drivers') diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c index 6cd08e145bfa..9e8bbdd470ca 100644 --- a/drivers/base/dma-mapping.c +++ b/drivers/base/dma-mapping.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include /* @@ -267,3 +269,73 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, return ret; } EXPORT_SYMBOL(dma_common_mmap); + +#ifdef CONFIG_MMU +/* + * remaps an array of PAGE_SIZE pages into another vm_area + * Cannot be used in non-sleeping contexts + */ +void *dma_common_pages_remap(struct page **pages, size_t size, + unsigned long vm_flags, pgprot_t prot, + const void *caller) +{ + struct vm_struct *area; + + area = get_vm_area_caller(size, vm_flags, caller); + if (!area) + return NULL; + + area->pages = pages; + + if (map_vm_area(area, prot, pages)) { + vunmap(area->addr); + return NULL; + } + + return area->addr; +} + +/* + * remaps an allocated contiguous region into another vm_area. + * Cannot be used in non-sleeping contexts + */ + +void *dma_common_contiguous_remap(struct page *page, size_t size, + unsigned long vm_flags, + pgprot_t prot, const void *caller) +{ + int i; + struct page **pages; + void *ptr; + unsigned long pfn; + + pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL); + if (!pages) + return NULL; + + for (i = 0, pfn = page_to_pfn(page); i < (size >> PAGE_SHIFT); i++) + pages[i] = pfn_to_page(pfn + i); + + ptr = dma_common_pages_remap(pages, size, vm_flags, prot, caller); + + kfree(pages); + + return ptr; +} + +/* + * unmaps a range previously mapped by dma_common_*_remap + */ +void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) +{ + struct vm_struct *area = find_vm_area(cpu_addr); + + if (!area || (area->flags & vm_flags) != vm_flags) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; + } + + unmap_kernel_range((unsigned long)cpu_addr, size); + vunmap(cpu_addr); +} +#endif -- cgit From 1f13ae399c58af5a05b5cee61da864e1f4071de4 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 9 Oct 2014 15:27:39 -0700 Subject: mm: remove noisy remainder of the scan_unevictable interface The deprecation warnings for the scan_unevictable interface triggers by scripts doing `sysctl -a | grep something else'. This is annoying and not helpful. The interface has been defunct since 264e56d8247e ("mm: disable user interface to manually rescue unevictable pages"), which was in 2011, and there haven't been any reports of usecases for it, only reports that the deprecation warnings are annying. It's unlikely that anybody is using this interface specifically at this point, so remove it. Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers') diff --git a/drivers/base/node.c b/drivers/base/node.c index d51c49c9bafa..472168cd0c97 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -289,8 +289,6 @@ static int register_node(struct node *node, int num, struct node *parent) device_create_file(&node->dev, &dev_attr_distance); device_create_file(&node->dev, &dev_attr_vmstat); - scan_unevictable_register_node(node); - hugetlb_register_node(node); compaction_register_node(node); @@ -314,7 +312,6 @@ void unregister_node(struct node *node) device_remove_file(&node->dev, &dev_attr_distance); device_remove_file(&node->dev, &dev_attr_vmstat); - scan_unevictable_unregister_node(node); hugetlb_unregister_node(node); /* no-op, if memoryless node */ device_unregister(&node->dev); -- cgit From 22880ebe76be421a572b6f004604467c63f281f5 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Thu, 9 Oct 2014 15:29:07 -0700 Subject: drivers/firmware/memmap.c: don't create memmap sysfs of same firmware_map_entry By the following commits, we prevented from allocating firmware_map_entry of same memory range: f0093ede: drivers/firmware/memmap.c: don't allocate firmware_map_entry of same memory range 49c8b24d: drivers/firmware/memmap.c: pass the correct argument to firmware_map_find_entry_bootmem() But it's not enough. When PNP0C80 device is added by acpi_scan_init(), memmap sysfses of same firmware_map_entry are created twice as follows: # cat /sys/firmware/memmap/*/start 0x40000000000 0x60000000000 0x4a837000 0x4a83a000 0x4a8b5000 ... 0x40000000000 0x60000000000 ... The flows of the issues are as follows: 1. e820_reserve_resources() allocates firmware_map_entrys of all memory ranges defined in e820. And, these firmware_map_entrys are linked with map_entries list. map_entries -> entry 1 -> ... -> entry N 2. When PNP0C80 device is limited by mem= boot option, acpi_scan_init() added the memory device. In this case, firmware_map_add_hotplug() allocates firmware_map_entry and creates memmap sysfs. map_entries -> entry 1 -> ... -> entry N -> entry N+1 | memmap 1 3. firmware_memmap_init() creates memmap sysfses of firmware_map_entrys linked with map_entries. map_entries -> entry 1 -> ... -> entry N -> entry N+1 | | | memmap 2 memmap N+1 memmap 1 memmap N+2 So while hot removing the PNP0C80 device, kernel panic occurs as follows: BUG: unable to handle kernel paging request at 00000001003e000b IP: sysfs_open_file+0x46/0x2b0 PGD 203a89fe067 PUD 0 Oops: 0000 [#1] SMP ... Call Trace: do_dentry_open+0x1ef/0x2a0 finish_open+0x31/0x40 do_last+0x57c/0x1220 path_openat+0xc2/0x4c0 do_filp_open+0x4b/0xb0 do_sys_open+0xf3/0x1f0 SyS_open+0x1e/0x20 system_call_fastpath+0x16/0x1b The patch adds a check of confirming whether memmap sysfs of firmware_map_entry has been created, and does not create memmap sysfs of same firmware_map_entry. Signed-off-by: Yasuaki Ishimatsu Cc: Santosh Shilimkar Cc: Toshi Kani Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/firmware/memmap.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c index 79f18e6d9c4f..cc016c615c19 100644 --- a/drivers/firmware/memmap.c +++ b/drivers/firmware/memmap.c @@ -184,6 +184,9 @@ static int add_sysfs_fw_map_entry(struct firmware_map_entry *entry) static int map_entries_nr; static struct kset *mmap_kset; + if (entry->kobj.state_in_sysfs) + return -EEXIST; + if (!mmap_kset) { mmap_kset = kset_create_and_add("memmap", NULL, firmware_kobj); if (!mmap_kset) -- cgit From d6d86c0a7f8ddc5b38cf089222cb1d9540762dc2 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 9 Oct 2014 15:29:27 -0700 Subject: mm/balloon_compaction: redesign ballooned pages management Sasha Levin reported KASAN splash inside isolate_migratepages_range(). Problem is in the function __is_movable_balloon_page() which tests AS_BALLOON_MAP in page->mapping->flags. This function has no protection against anonymous pages. As result it tried to check address space flags inside struct anon_vma. Further investigation shows more problems in current implementation: * Special branch in __unmap_and_move() never works: balloon_page_movable() checks page flags and page_count. In __unmap_and_move() page is locked, reference counter is elevated, thus balloon_page_movable() always fails. As a result execution goes to the normal migration path. virtballoon_migratepage() returns MIGRATEPAGE_BALLOON_SUCCESS instead of MIGRATEPAGE_SUCCESS, move_to_new_page() thinks this is an error code and assigns newpage->mapping to NULL. Newly migrated page lose connectivity with balloon an all ability for further migration. * lru_lock erroneously required in isolate_migratepages_range() for isolation ballooned page. This function releases lru_lock periodically, this makes migration mostly impossible for some pages. * balloon_page_dequeue have a tight race with balloon_page_isolate: balloon_page_isolate could be executed in parallel with dequeue between picking page from list and locking page_lock. Race is rare because they use trylock_page() for locking. This patch fixes all of them. Instead of fake mapping with special flag this patch uses special state of page->_mapcount: PAGE_BALLOON_MAPCOUNT_VALUE = -256. Buddy allocator uses PAGE_BUDDY_MAPCOUNT_VALUE = -128 for similar purpose. Storing mark directly in struct page makes everything safer and easier. PagePrivate is used to mark pages present in page list (i.e. not isolated, like PageLRU for normal pages). It replaces special rules for reference counter and makes balloon migration similar to migration of normal pages. This flag is protected by page_lock together with link to the balloon device. Signed-off-by: Konstantin Khlebnikov Reported-by: Sasha Levin Link: http://lkml.kernel.org/p/53E6CEAA.9020105@oracle.com Cc: Rafael Aquini Cc: Andrey Ryabinin Cc: [3.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/virtio/virtio_balloon.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 25ebe8eecdb7..c3eb93fc9261 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -163,8 +163,8 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num) /* Find pfns pointing at start of each page, get pages and free them. */ for (i = 0; i < num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) { struct page *page = balloon_pfn_to_page(pfns[i]); - balloon_page_free(page); adjust_managed_page_count(page, 1); + put_page(page); /* balloon reference */ } } @@ -395,6 +395,8 @@ static int virtballoon_migratepage(struct address_space *mapping, if (!mutex_trylock(&vb->balloon_lock)) return -EAGAIN; + get_page(newpage); /* balloon reference */ + /* balloon's page migration 1st step -- inflate "newpage" */ spin_lock_irqsave(&vb_dev_info->pages_lock, flags); balloon_page_insert(newpage, mapping, &vb_dev_info->pages); @@ -404,12 +406,7 @@ static int virtballoon_migratepage(struct address_space *mapping, set_page_pfns(vb->pfns, newpage); tell_host(vb, vb->inflate_vq); - /* - * balloon's page migration 2nd step -- deflate "page" - * - * It's safe to delete page->lru here because this page is at - * an isolated migration list, and this step is expected to happen here - */ + /* balloon's page migration 2nd step -- deflate "page" */ balloon_page_delete(page); vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; set_page_pfns(vb->pfns, page); @@ -417,7 +414,9 @@ static int virtballoon_migratepage(struct address_space *mapping, mutex_unlock(&vb->balloon_lock); - return MIGRATEPAGE_BALLOON_SUCCESS; + put_page(page); /* balloon reference */ + + return MIGRATEPAGE_SUCCESS; } /* define the balloon_mapping->a_ops callback to allow balloon page migration */ -- cgit From 9d1ba8056474a208ed9efb7e58cd014795d9f818 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 9 Oct 2014 15:29:29 -0700 Subject: mm/balloon_compaction: remove balloon mapping and flag AS_BALLOON_MAP Now ballooned pages are detected using PageBalloon(). Fake mapping is no longer required. This patch links ballooned pages to balloon device using field page->private instead of page->mapping. Also this patch embeds balloon_dev_info directly into struct virtio_balloon. Signed-off-by: Konstantin Khlebnikov Cc: Rafael Aquini Cc: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/virtio/virtio_balloon.c | 60 +++++++++-------------------------------- 1 file changed, 13 insertions(+), 47 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index c3eb93fc9261..2bad7f9dd2ac 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -59,7 +59,7 @@ struct virtio_balloon * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE * to num_pages above. */ - struct balloon_dev_info *vb_dev_info; + struct balloon_dev_info vb_dev_info; /* Synchronize access/update to this struct virtio_balloon elements */ struct mutex balloon_lock; @@ -127,7 +127,7 @@ static void set_page_pfns(u32 pfns[], struct page *page) static void fill_balloon(struct virtio_balloon *vb, size_t num) { - struct balloon_dev_info *vb_dev_info = vb->vb_dev_info; + struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; /* We can only do one array worth at a time. */ num = min(num, ARRAY_SIZE(vb->pfns)); @@ -171,7 +171,7 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num) static void leak_balloon(struct virtio_balloon *vb, size_t num) { struct page *page; - struct balloon_dev_info *vb_dev_info = vb->vb_dev_info; + struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; /* We can only do one array worth at a time. */ num = min(num, ARRAY_SIZE(vb->pfns)); @@ -353,12 +353,11 @@ static int init_vqs(struct virtio_balloon *vb) return 0; } -static const struct address_space_operations virtio_balloon_aops; #ifdef CONFIG_BALLOON_COMPACTION /* * virtballoon_migratepage - perform the balloon page migration on behalf of * a compation thread. (called under page lock) - * @mapping: the page->mapping which will be assigned to the new migrated page. + * @vb_dev_info: the balloon device * @newpage: page that will replace the isolated page after migration finishes. * @page : the isolated (old) page that is about to be migrated to newpage. * @mode : compaction mode -- not used for balloon page migration. @@ -373,17 +372,13 @@ static const struct address_space_operations virtio_balloon_aops; * This function preforms the balloon page migration task. * Called through balloon_mapping->a_ops->migratepage */ -static int virtballoon_migratepage(struct address_space *mapping, +static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, struct page *newpage, struct page *page, enum migrate_mode mode) { - struct balloon_dev_info *vb_dev_info = balloon_page_device(page); - struct virtio_balloon *vb; + struct virtio_balloon *vb = container_of(vb_dev_info, + struct virtio_balloon, vb_dev_info); unsigned long flags; - BUG_ON(!vb_dev_info); - - vb = vb_dev_info->balloon_device; - /* * In order to avoid lock contention while migrating pages concurrently * to leak_balloon() or fill_balloon() we just give up the balloon_lock @@ -399,7 +394,7 @@ static int virtballoon_migratepage(struct address_space *mapping, /* balloon's page migration 1st step -- inflate "newpage" */ spin_lock_irqsave(&vb_dev_info->pages_lock, flags); - balloon_page_insert(newpage, mapping, &vb_dev_info->pages); + balloon_page_insert(vb_dev_info, newpage); vb_dev_info->isolated_pages--; spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; @@ -418,18 +413,11 @@ static int virtballoon_migratepage(struct address_space *mapping, return MIGRATEPAGE_SUCCESS; } - -/* define the balloon_mapping->a_ops callback to allow balloon page migration */ -static const struct address_space_operations virtio_balloon_aops = { - .migratepage = virtballoon_migratepage, -}; #endif /* CONFIG_BALLOON_COMPACTION */ static int virtballoon_probe(struct virtio_device *vdev) { struct virtio_balloon *vb; - struct address_space *vb_mapping; - struct balloon_dev_info *vb_devinfo; int err; vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); @@ -445,30 +433,14 @@ static int virtballoon_probe(struct virtio_device *vdev) vb->vdev = vdev; vb->need_stats_update = 0; - vb_devinfo = balloon_devinfo_alloc(vb); - if (IS_ERR(vb_devinfo)) { - err = PTR_ERR(vb_devinfo); - goto out_free_vb; - } - - vb_mapping = balloon_mapping_alloc(vb_devinfo, - (balloon_compaction_check()) ? - &virtio_balloon_aops : NULL); - if (IS_ERR(vb_mapping)) { - /* - * IS_ERR(vb_mapping) && PTR_ERR(vb_mapping) == -EOPNOTSUPP - * This means !CONFIG_BALLOON_COMPACTION, otherwise we get off. - */ - err = PTR_ERR(vb_mapping); - if (err != -EOPNOTSUPP) - goto out_free_vb_devinfo; - } - - vb->vb_dev_info = vb_devinfo; + balloon_devinfo_init(&vb->vb_dev_info); +#ifdef CONFIG_BALLOON_COMPACTION + vb->vb_dev_info.migratepage = virtballoon_migratepage; +#endif err = init_vqs(vb); if (err) - goto out_free_vb_mapping; + goto out_free_vb; vb->thread = kthread_run(balloon, vb, "vballoon"); if (IS_ERR(vb->thread)) { @@ -480,10 +452,6 @@ static int virtballoon_probe(struct virtio_device *vdev) out_del_vqs: vdev->config->del_vqs(vdev); -out_free_vb_mapping: - balloon_mapping_free(vb_mapping); -out_free_vb_devinfo: - balloon_devinfo_free(vb_devinfo); out_free_vb: kfree(vb); out: @@ -509,8 +477,6 @@ static void virtballoon_remove(struct virtio_device *vdev) kthread_stop(vb->thread); remove_common(vb); - balloon_mapping_free(vb->vb_dev_info->mapping); - balloon_devinfo_free(vb->vb_dev_info); kfree(vb); } -- cgit From 09316c09dde33aae14f34489d9e3d243ec0d5938 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 9 Oct 2014 15:29:32 -0700 Subject: mm/balloon_compaction: add vmstat counters and kpageflags bit Always mark pages with PageBalloon even if balloon compaction is disabled and expose this mark in /proc/kpageflags as KPF_BALLOON. Also this patch adds three counters into /proc/vmstat: "balloon_inflate", "balloon_deflate" and "balloon_migrate". They accumulate balloon activity. Current size of balloon is (balloon_inflate - balloon_deflate) pages. All generic balloon code now gathered under option CONFIG_MEMORY_BALLOON. It should be selected by ballooning driver which wants use this feature. Currently virtio-balloon is the only user. Signed-off-by: Konstantin Khlebnikov Cc: Rafael Aquini Cc: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/virtio/Kconfig | 1 + drivers/virtio/virtio_balloon.c | 1 + 2 files changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index c6683f2e396c..00b228638274 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -25,6 +25,7 @@ config VIRTIO_PCI config VIRTIO_BALLOON tristate "Virtio balloon driver" depends on VIRTIO + select MEMORY_BALLOON ---help--- This driver supports increasing and decreasing the amount of memory within a KVM guest. diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 2bad7f9dd2ac..f893148a107b 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -396,6 +396,7 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, spin_lock_irqsave(&vb_dev_info->pages_lock, flags); balloon_page_insert(vb_dev_info, newpage); vb_dev_info->isolated_pages--; + __count_vm_event(BALLOON_MIGRATE); spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; set_page_pfns(vb->pfns, newpage); -- cgit From f0d6d1f6ff6f8525cfa396ec1969b8f402391445 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 9 Oct 2014 15:29:41 -0700 Subject: CMA: document cma=0 It isn't obvious that CMA can be disabled on the kernel's command line, so document it. Signed-off-by: Jean Delvare Cc: Joonsoo Kim Cc: Greg Kroah-Hartman Cc: Akinobu Mita Cc: Chuck Ebbert Cc: Marek Szyprowski Cc: Konrad Rzeszutek Wilk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/Kconfig | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 134f763d90fd..61a33f4ba608 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -252,6 +252,9 @@ config DMA_CMA to allocate big physically-contiguous blocks of memory for use with hardware components that do not support I/O map nor scatter-gather. + You can disable CMA by specifying "cma=0" on the kernel's command + line. + For more information see . If unsure, say "n". -- cgit From 722cdc17232f0f684011407f7cf3c40d39457971 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 9 Oct 2014 15:29:50 -0700 Subject: zsmalloc: change return value unit of zs_get_total_size_bytes zs_get_total_size_bytes returns a amount of memory zsmalloc consumed with *byte unit* but zsmalloc operates *page unit* rather than byte unit so let's change the API so benefit we could get is that reduce unnecessary overhead (ie, change page unit with byte unit) in zsmalloc. Since return type is pages, "zs_get_total_pages" is better than "zs_get_total_size_bytes". Signed-off-by: Minchan Kim Reviewed-by: Dan Streetman Cc: Sergey Senozhatsky Cc: Jerome Marchand Cc: Cc: Cc: Luigi Semenzato Cc: Nitin Gupta Cc: Seth Jennings Cc: David Horner Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index d00831c3d731..f0b8b30a7128 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -103,10 +103,10 @@ static ssize_t mem_used_total_show(struct device *dev, down_read(&zram->init_lock); if (init_done(zram)) - val = zs_get_total_size_bytes(meta->mem_pool); + val = zs_get_total_pages(meta->mem_pool); up_read(&zram->init_lock); - return scnprintf(buf, PAGE_SIZE, "%llu\n", val); + return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); } static ssize_t max_comp_streams_show(struct device *dev, -- cgit From 9ada9da9573f3460b156b7755c093e30b258eacb Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 9 Oct 2014 15:29:53 -0700 Subject: zram: zram memory size limitation Since zram has no control feature to limit memory usage, it makes hard to manage system memrory. This patch adds new knob "mem_limit" via sysfs to set up the a limit so that zram could fail allocation once it reaches the limit. In addition, user could change the limit in runtime so that he could manage the memory more dynamically. Initial state is no limit so it doesn't break old behavior. [akpm@linux-foundation.org: fix typo, per Sergey] Signed-off-by: Minchan Kim Cc: Dan Streetman Cc: Sergey Senozhatsky Cc: Jerome Marchand Cc: Cc: Cc: Luigi Semenzato Cc: Nitin Gupta Cc: Seth Jennings Cc: David Horner Cc: Joonsoo Kim Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 45 +++++++++++++++++++++++++++++++++++++++++++ drivers/block/zram/zram_drv.h | 5 +++++ 2 files changed, 50 insertions(+) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f0b8b30a7128..64b27cf9a583 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -122,6 +122,37 @@ static ssize_t max_comp_streams_show(struct device *dev, return scnprintf(buf, PAGE_SIZE, "%d\n", val); } +static ssize_t mem_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u64 val; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + val = zram->limit_pages; + up_read(&zram->init_lock); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); +} + +static ssize_t mem_limit_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + u64 limit; + char *tmp; + struct zram *zram = dev_to_zram(dev); + + limit = memparse(buf, &tmp); + if (buf == tmp) /* no chars parsed, invalid input */ + return -EINVAL; + + down_write(&zram->init_lock); + zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; + up_write(&zram->init_lock); + + return len; +} + static ssize_t max_comp_streams_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -513,6 +544,14 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, ret = -ENOMEM; goto out; } + + if (zram->limit_pages && + zs_get_total_pages(meta->mem_pool) > zram->limit_pages) { + zs_free(meta->mem_pool, handle); + ret = -ENOMEM; + goto out; + } + cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { @@ -617,6 +656,9 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) struct zram_meta *meta; down_write(&zram->init_lock); + + zram->limit_pages = 0; + if (!init_done(zram)) { up_write(&zram->init_lock); return; @@ -857,6 +899,8 @@ static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); +static DEVICE_ATTR(mem_limit, S_IRUGO | S_IWUSR, mem_limit_show, + mem_limit_store); static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR, max_comp_streams_show, max_comp_streams_store); static DEVICE_ATTR(comp_algorithm, S_IRUGO | S_IWUSR, @@ -885,6 +929,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_orig_data_size.attr, &dev_attr_compr_data_size.attr, &dev_attr_mem_used_total.attr, + &dev_attr_mem_limit.attr, &dev_attr_max_comp_streams.attr, &dev_attr_comp_algorithm.attr, NULL, diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index e0f725c87cc6..b7aa9c21553f 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -112,6 +112,11 @@ struct zram { u64 disksize; /* bytes */ int max_comp_streams; struct zram_stats stats; + /* + * the number of pages zram can consume for storing compressed data + */ + unsigned long limit_pages; + char compressor[10]; }; #endif -- cgit From 461a8eee6af3b55745be64bea403ed0b743563cf Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 9 Oct 2014 15:29:55 -0700 Subject: zram: report maximum used memory Normally, zram user could get maximum memory usage zram consumed via polling mem_used_total with sysfs in userspace. But it has a critical problem because user can miss peak memory usage during update inverval of polling. For avoiding that, user should poll it with shorter interval(ie, 0.0000000001s) with mlocking to avoid page fault delay when memory pressure is heavy. It would be troublesome. This patch adds new knob "mem_used_max" so user could see the maximum memory usage easily via reading the knob and reset it via "echo 0 > /sys/block/zram0/mem_used_max". Signed-off-by: Minchan Kim Reviewed-by: Dan Streetman Cc: Sergey Senozhatsky Cc: Jerome Marchand Cc: Cc: Cc: Luigi Semenzato Cc: Nitin Gupta Cc: Seth Jennings Reviewed-by: David Horner Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 60 +++++++++++++++++++++++++++++++++++++++++-- drivers/block/zram/zram_drv.h | 1 + 2 files changed, 59 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 64b27cf9a583..d78b245bae06 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -153,6 +153,41 @@ static ssize_t mem_limit_store(struct device *dev, return len; } +static ssize_t mem_used_max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u64 val = 0; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + if (init_done(zram)) + val = atomic_long_read(&zram->stats.max_used_pages); + up_read(&zram->init_lock); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); +} + +static ssize_t mem_used_max_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int err; + unsigned long val; + struct zram *zram = dev_to_zram(dev); + struct zram_meta *meta = zram->meta; + + err = kstrtoul(buf, 10, &val); + if (err || val != 0) + return -EINVAL; + + down_read(&zram->init_lock); + if (init_done(zram)) + atomic_long_set(&zram->stats.max_used_pages, + zs_get_total_pages(meta->mem_pool)); + up_read(&zram->init_lock); + + return len; +} + static ssize_t max_comp_streams_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -465,6 +500,21 @@ out_cleanup: return ret; } +static inline void update_used_max(struct zram *zram, + const unsigned long pages) +{ + int old_max, cur_max; + + old_max = atomic_long_read(&zram->stats.max_used_pages); + + do { + cur_max = old_max; + if (pages > cur_max) + old_max = atomic_long_cmpxchg( + &zram->stats.max_used_pages, cur_max, pages); + } while (old_max != cur_max); +} + static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, int offset) { @@ -476,6 +526,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, struct zram_meta *meta = zram->meta; struct zcomp_strm *zstrm; bool locked = false; + unsigned long alloced_pages; page = bvec->bv_page; if (is_partial_io(bvec)) { @@ -545,13 +596,15 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } - if (zram->limit_pages && - zs_get_total_pages(meta->mem_pool) > zram->limit_pages) { + alloced_pages = zs_get_total_pages(meta->mem_pool); + if (zram->limit_pages && alloced_pages > zram->limit_pages) { zs_free(meta->mem_pool, handle); ret = -ENOMEM; goto out; } + update_used_max(zram, alloced_pages); + cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { @@ -901,6 +954,8 @@ static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); static DEVICE_ATTR(mem_limit, S_IRUGO | S_IWUSR, mem_limit_show, mem_limit_store); +static DEVICE_ATTR(mem_used_max, S_IRUGO | S_IWUSR, mem_used_max_show, + mem_used_max_store); static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR, max_comp_streams_show, max_comp_streams_store); static DEVICE_ATTR(comp_algorithm, S_IRUGO | S_IWUSR, @@ -930,6 +985,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_compr_data_size.attr, &dev_attr_mem_used_total.attr, &dev_attr_mem_limit.attr, + &dev_attr_mem_used_max.attr, &dev_attr_max_comp_streams.attr, &dev_attr_comp_algorithm.attr, NULL, diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index b7aa9c21553f..c6ee271317f5 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -90,6 +90,7 @@ struct zram_stats { atomic64_t notify_free; /* no. of swap slot free notifications */ atomic64_t zero_pages; /* no. of zero filled pages */ atomic64_t pages_stored; /* no. of pages currently stored */ + atomic_long_t max_used_pages; /* no. of maximum pages stored */ }; struct zram_meta { -- cgit From 015254daf1753003c19c46b90ee85a963260d270 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 9 Oct 2014 15:29:57 -0700 Subject: zram: use notify_free to account all free notifications `notify_free' device attribute accounts the number of slot free notifications and internally represents the number of zram_free_page() calls. Slot free notifications are sent only when device is used as a swap device, hence `notify_free' is used only for swap devices. Since f4659d8e620d08 (zram: support REQ_DISCARD) ZRAM handles yet another one free notification (also via zram_free_page() call) -- REQ_DISCARD requests, which are sent by a filesystem, whenever some data blocks are discarded. However, there is no way to know the number of notifications in the latter case. Use `notify_free' to account the number of pages freed by zram_bio_discard() and zram_slot_free_notify(). Depending on usage scenario `notify_free' represents: a) the number of pages freed because of slot free notifications, which is equal to the number of swap_slot_free_notify() calls, so there is no behaviour change b) the number of pages freed because of REQ_DISCARD notifications Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Acked-by: Jerome Marchand Cc: Nitin Gupta Cc: Chao Yu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index d78b245bae06..3b850164c65c 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -698,6 +698,7 @@ static void zram_bio_discard(struct zram *zram, u32 index, bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); + atomic64_inc(&zram->stats.notify_free); index++; n -= PAGE_SIZE; } -- cgit