diff options
| author | David S. Miller <davem@davemloft.net> | 2010-09-08 23:49:04 -0700 | 
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2010-09-08 23:49:04 -0700 | 
| commit | e199e6136ce6b151e6638ae93dca60748424d900 (patch) | |
| tree | 0d66e0b5d227c36b005e4f5537f4bbcfc6ed4904 /lib | |
| parent | 972c40b5bee429c84ba727f8ac0a08292bc5dc3d (diff) | |
| parent | d56557af19867edb8c0e96f8e26399698a08857f (diff) | |
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/Kconfig | 3 | ||||
| -rw-r--r-- | lib/Kconfig.debug | 73 | ||||
| -rw-r--r-- | lib/Makefile | 1 | ||||
| -rw-r--r-- | lib/bug.c | 12 | ||||
| -rw-r--r-- | lib/decompress_bunzip2.c | 10 | ||||
| -rw-r--r-- | lib/flex_array.c | 25 | ||||
| -rw-r--r-- | lib/inflate.c | 2 | ||||
| -rw-r--r-- | lib/iommu-helper.c | 9 | ||||
| -rw-r--r-- | lib/ioremap.c | 10 | ||||
| -rw-r--r-- | lib/kobject_uevent.c | 4 | ||||
| -rw-r--r-- | lib/list_debug.c | 6 | ||||
| -rw-r--r-- | lib/percpu_counter.c | 27 | ||||
| -rw-r--r-- | lib/radix-tree.c | 134 | ||||
| -rw-r--r-- | lib/raid6/.gitignore | 4 | ||||
| -rw-r--r-- | lib/raid6/Makefile | 75 | ||||
| -rw-r--r-- | lib/raid6/algos.c | 154 | ||||
| -rw-r--r-- | lib/raid6/altivec.uc | 130 | ||||
| -rw-r--r-- | lib/raid6/int.uc | 117 | ||||
| -rw-r--r-- | lib/raid6/mktables.c | 132 | ||||
| -rw-r--r-- | lib/raid6/mmx.c | 142 | ||||
| -rw-r--r-- | lib/raid6/recov.c | 132 | ||||
| -rw-r--r-- | lib/raid6/sse1.c | 162 | ||||
| -rw-r--r-- | lib/raid6/sse2.c | 262 | ||||
| -rw-r--r-- | lib/raid6/test/Makefile | 72 | ||||
| -rw-r--r-- | lib/raid6/test/test.c | 124 | ||||
| -rw-r--r-- | lib/raid6/unroll.awk | 20 | ||||
| -rw-r--r-- | lib/raid6/x86.h | 61 | ||||
| -rw-r--r-- | lib/rwsem.c | 150 | ||||
| -rw-r--r-- | lib/scatterlist.c | 23 | ||||
| -rw-r--r-- | lib/vsprintf.c | 14 | 
30 files changed, 1967 insertions, 123 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 5b916bc0fbae..fa9bf2c06199 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -7,6 +7,9 @@ config BINARY_PRINTF  menu "Library routines" +config RAID6_PQ +	tristate +  config BITREVERSE  	tristate diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index dfdc0347b05d..1b4afd2e6ca0 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -76,7 +76,6 @@ config UNUSED_SYMBOLS  config DEBUG_FS  	bool "Debug Filesystem" -	depends on SYSFS  	help  	  debugfs is a virtual file system that kernel developers use to put  	  debugging files into.  Enable this option to be able to read and @@ -152,28 +151,33 @@ config DEBUG_SHIRQ  	  Drivers ought to be able to handle interrupts coming in at those  	  points; some don't and need to be caught. -config DETECT_SOFTLOCKUP -	bool "Detect Soft Lockups" +config LOCKUP_DETECTOR +	bool "Detect Hard and Soft Lockups"  	depends on DEBUG_KERNEL && !S390 -	default y  	help -	  Say Y here to enable the kernel to detect "soft lockups", -	  which are bugs that cause the kernel to loop in kernel +	  Say Y here to enable the kernel to act as a watchdog to detect +	  hard and soft lockups. + +	  Softlockups are bugs that cause the kernel to loop in kernel  	  mode for more than 60 seconds, without giving other tasks a -	  chance to run. +	  chance to run.  The current stack trace is displayed upon +	  detection and the system will stay locked up. -	  When a soft-lockup is detected, the kernel will print the -	  current stack trace (which you should report), but the -	  system will stay locked up. This feature has negligible -	  overhead. +	  Hardlockups are bugs that cause the CPU to loop in kernel mode +	  for more than 60 seconds, without letting other interrupts have a +	  chance to run.  The current stack trace is displayed upon detection +	  and the system will stay locked up. -	  (Note that "hard lockups" are separate type of bugs that -	   can be detected via the NMI-watchdog, on platforms that -	   support it.) +	  The overhead should be minimal.  A periodic hrtimer runs to +	  generate interrupts and kick the watchdog task every 10-12 seconds. +	  An NMI is generated every 60 seconds or so to check for hardlockups. + +config HARDLOCKUP_DETECTOR +	def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI  config BOOTPARAM_SOFTLOCKUP_PANIC  	bool "Panic (Reboot) On Soft Lockups" -	depends on DETECT_SOFTLOCKUP +	depends on LOCKUP_DETECTOR  	help  	  Say Y here to enable the kernel to panic on "soft lockups",  	  which are bugs that cause the kernel to loop in kernel @@ -190,7 +194,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC  config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE  	int -	depends on DETECT_SOFTLOCKUP +	depends on LOCKUP_DETECTOR  	range 0 1  	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC  	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC @@ -307,6 +311,12 @@ config DEBUG_OBJECTS_WORK  	  work queue routines to track the life time of work objects and  	  validate the work operations. +config DEBUG_OBJECTS_RCU_HEAD +	bool "Debug RCU callbacks objects" +	depends on DEBUG_OBJECTS && PREEMPT +	help +	  Enable this to turn on debugging of RCU list heads (call_rcu() usage). +  config DEBUG_OBJECTS_ENABLE_DEFAULT  	int "debug_objects bootup default value (0-1)"          range 0 1 @@ -400,6 +410,13 @@ config DEBUG_KMEMLEAK_TEST  	  If unsure, say N. +config DEBUG_KMEMLEAK_DEFAULT_OFF +	bool "Default kmemleak to off" +	depends on DEBUG_KMEMLEAK +	help +	  Say Y here to disable kmemleak by default. It can then be enabled +	  on the command line via kmemleak=on. +  config DEBUG_PREEMPT  	bool "Debug preemptible kernel"  	depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT @@ -528,7 +545,7 @@ config LOCKDEP  	bool  	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT  	select STACKTRACE -	select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 +	select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE  	select KALLSYMS  	select KALLSYMS_ALL @@ -628,6 +645,19 @@ config DEBUG_INFO  	  If unsure, say N. +config DEBUG_INFO_REDUCED +	bool "Reduce debugging information" +	depends on DEBUG_INFO +	help +	  If you say Y here gcc is instructed to generate less debugging +	  information for structure types. This means that tools that +	  need full debugging information (like kgdb or systemtap) won't +	  be happy. But if you merely need debugging information to +	  resolve line numbers there is no loss. Advantage is that +	  build directory object sizes shrink dramatically over a full +	  DEBUG_INFO build and compile times are reduced too. +	  Only works with newer gcc versions. +  config DEBUG_VM  	bool "Debug VM"  	depends on DEBUG_KERNEL @@ -958,19 +988,22 @@ config FAULT_INJECTION_STACKTRACE_FILTER  	depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT  	depends on !X86_64  	select STACKTRACE -	select FRAME_POINTER if !PPC && !S390 +	select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE  	help  	  Provide stacktrace filter for fault-injection capabilities  config LATENCYTOP  	bool "Latency measuring infrastructure" -	select FRAME_POINTER if !MIPS && !PPC && !S390 +	depends on HAVE_LATENCYTOP_SUPPORT +	depends on DEBUG_KERNEL +	depends on STACKTRACE_SUPPORT +	depends on PROC_FS +	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE  	select KALLSYMS  	select KALLSYMS_ALL  	select STACKTRACE  	select SCHEDSTATS  	select SCHED_DEBUG -	depends on HAVE_LATENCYTOP_SUPPORT  	help  	  Enable this option if you want to use the LatencyTOP tool  	  to find out which userspace is blocking on what kernel operations. diff --git a/lib/Makefile b/lib/Makefile index 0bfabba1bb32..e6a3763b8212 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/  obj-$(CONFIG_REED_SOLOMON) += reed_solomon/  obj-$(CONFIG_LZO_COMPRESS) += lzo/  obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ +obj-$(CONFIG_RAID6_PQ) += raid6/  lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o  lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o diff --git a/lib/bug.c b/lib/bug.c index f13daf435211..7cdfad88128f 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -136,8 +136,6 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)  	bug = find_bug(bugaddr); -	printk(KERN_EMERG "------------[ cut here ]------------\n"); -  	file = NULL;  	line = 0;  	warning = 0; @@ -156,19 +154,25 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)  	if (warning) {  		/* this is a WARN_ON rather than BUG/BUG_ON */ +		printk(KERN_WARNING "------------[ cut here ]------------\n"); +  		if (file) -			printk(KERN_ERR "Badness at %s:%u\n", +			printk(KERN_WARNING "WARNING: at %s:%u\n",  			       file, line);  		else -			printk(KERN_ERR "Badness at %p " +			printk(KERN_WARNING "WARNING: at %p "  			       "[verbose debug info unavailable]\n",  			       (void *)bugaddr); +		print_modules();  		show_regs(regs); +		print_oops_end_marker();  		add_taint(BUG_GET_TAINT(bug));  		return BUG_TRAP_TYPE_WARN;  	} +	printk(KERN_EMERG "------------[ cut here ]------------\n"); +  	if (file)  		printk(KERN_CRIT "kernel BUG at %s:%u!\n",  		       file, line); diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index a4e971dee102..81c8bb1cc6aa 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -107,6 +107,8 @@ struct bunzip_data {  	unsigned char selectors[32768];		/* nSelectors = 15 bits */  	struct group_data groups[MAX_GROUPS];	/* Huffman coding tables */  	int io_error;			/* non-zero if we have IO error */ +	int byteCount[256]; +	unsigned char symToByte[256], mtfSymbol[256];  }; @@ -158,14 +160,16 @@ static int INIT get_next_block(struct bunzip_data *bd)  	int *base = NULL;  	int *limit = NULL;  	int dbufCount, nextSym, dbufSize, groupCount, selector, -		i, j, k, t, runPos, symCount, symTotal, nSelectors, -		byteCount[256]; -	unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; +		i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount; +	unsigned char uc, *symToByte, *mtfSymbol, *selectors;  	unsigned int *dbuf, origPtr;  	dbuf = bd->dbuf;  	dbufSize = bd->dbufSize;  	selectors = bd->selectors; +	byteCount = bd->byteCount; +	symToByte = bd->symToByte; +	mtfSymbol = bd->mtfSymbol;  	/* Read in header signature and CRC, then validate signature.  	   (last block signature means CRC is for whole file, return now) */ diff --git a/lib/flex_array.c b/lib/flex_array.c index 41b1804fa728..77a6fea7481e 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -171,6 +171,8 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)   * Note that this *copies* the contents of @src into   * the array.  If you are trying to store an array of   * pointers, make sure to pass in &ptr instead of ptr. + * You may instead wish to use the flex_array_put_ptr() + * helper function.   *   * Locking must be provided by the caller.   */ @@ -265,7 +267,8 @@ int flex_array_prealloc(struct flex_array *fa, unsigned int start,   *   * Returns a pointer to the data at index @element_nr.  Note   * that this is a copy of the data that was passed in.  If you - * are using this to store pointers, you'll get back &ptr. + * are using this to store pointers, you'll get back &ptr.  You + * may instead wish to use the flex_array_get_ptr helper.   *   * Locking must be provided by the caller.   */ @@ -286,6 +289,26 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr)  	return &part->elements[index_inside_part(fa, element_nr)];  } +/** + * flex_array_get_ptr - pull a ptr back out of the array + * @fa:		the flex array from which to extract data + * @element_nr:	index of the element to fetch from the array + * + * Returns the pointer placed in the flex array at element_nr using + * flex_array_put_ptr().  This function should not be called if the + * element in question was not set using the _put_ptr() helper. + */ +void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr) +{ +	void **tmp; + +	tmp = flex_array_get(fa, element_nr); +	if (!tmp) +		return NULL; + +	return *tmp; +} +  static int part_is_free(struct flex_array_part *part)  {  	int i; diff --git a/lib/inflate.c b/lib/inflate.c index 677b738c2204..013a76193481 100644 --- a/lib/inflate.c +++ b/lib/inflate.c @@ -103,7 +103,9 @@        the two sets of lengths.   */  #include <linux/compiler.h> +#ifdef NO_INFLATE_MALLOC  #include <linux/slab.h> +#endif  #ifdef RCSID  static char rcsid[] = "#Id: inflate.c,v 0.14 1993/06/10 13:27:04 jloup Exp #"; diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c index c0251f4ad08b..da053313ee5c 100644 --- a/lib/iommu-helper.c +++ b/lib/iommu-helper.c @@ -38,12 +38,3 @@ again:  	return -1;  }  EXPORT_SYMBOL(iommu_area_alloc); - -unsigned long iommu_num_pages(unsigned long addr, unsigned long len, -			      unsigned long io_page_size) -{ -	unsigned long size = (addr & (io_page_size - 1)) + len; - -	return DIV_ROUND_UP(size, io_page_size); -} -EXPORT_SYMBOL(iommu_num_pages); diff --git a/lib/ioremap.c b/lib/ioremap.c index 14c6078f17a2..5730ecd3eb66 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -13,10 +13,10 @@  #include <asm/pgtable.h>  static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, -		unsigned long end, unsigned long phys_addr, pgprot_t prot) +		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)  {  	pte_t *pte; -	unsigned long pfn; +	u64 pfn;  	pfn = phys_addr >> PAGE_SHIFT;  	pte = pte_alloc_kernel(pmd, addr); @@ -31,7 +31,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,  }  static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, -		unsigned long end, unsigned long phys_addr, pgprot_t prot) +		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)  {  	pmd_t *pmd;  	unsigned long next; @@ -49,7 +49,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,  }  static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, -		unsigned long end, unsigned long phys_addr, pgprot_t prot) +		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)  {  	pud_t *pud;  	unsigned long next; @@ -67,7 +67,7 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,  }  int ioremap_page_range(unsigned long addr, -		       unsigned long end, unsigned long phys_addr, pgprot_t prot) +		       unsigned long end, phys_addr_t phys_addr, pgprot_t prot)  {  	pgd_t *pgd;  	unsigned long start; diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index b93579504dfa..70af0a7f97c0 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -123,7 +123,7 @@ static int kobj_usermode_filter(struct kobject *kobj)   * @kobj: struct kobject that the action is happening to   * @envp_ext: pointer to environmental data   * - * Returns 0 if kobject_uevent() is completed with success or the + * Returns 0 if kobject_uevent_env() is completed with success or the   * corresponding error when it fails.   */  int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, @@ -317,7 +317,7 @@ exit:  EXPORT_SYMBOL_GPL(kobject_uevent_env);  /** - * kobject_uevent - notify userspace by ending an uevent + * kobject_uevent - notify userspace by sending an uevent   *   * @action: action that is happening   * @kobj: struct kobject that the action is happening to diff --git a/lib/list_debug.c b/lib/list_debug.c index 1a39f4e3ae1f..344c710d16ca 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -43,6 +43,12 @@ EXPORT_SYMBOL(__list_add);   */  void list_del(struct list_head *entry)  { +	WARN(entry->next == LIST_POISON1, +		"list_del corruption, next is LIST_POISON1 (%p)\n", +		LIST_POISON1); +	WARN(entry->next != LIST_POISON1 && entry->prev == LIST_POISON2, +		"list_del corruption, prev is LIST_POISON2 (%p)\n", +		LIST_POISON2);  	WARN(entry->prev->next != entry,  		"list_del corruption. prev->next should be %p, "  		"but was %p\n", entry, entry->prev->next); diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index aeaa6d734447..ec9048e74f44 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -137,6 +137,33 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,  	return NOTIFY_OK;  } +/* + * Compare counter against given value. + * Return 1 if greater, 0 if equal and -1 if less + */ +int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) +{ +	s64	count; + +	count = percpu_counter_read(fbc); +	/* Check to see if rough count will be sufficient for comparison */ +	if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) { +		if (count > rhs) +			return 1; +		else +			return -1; +	} +	/* Need to use precise count */ +	count = percpu_counter_sum(fbc); +	if (count > rhs) +		return 1; +	else if (count < rhs) +		return -1; +	else +		return 0; +} +EXPORT_SYMBOL(percpu_counter_compare); +  static int __init percpu_counter_startup(void)  {  	compute_batch_value(); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 05da38bcc298..efd16fa80b1c 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -174,14 +174,16 @@ static void radix_tree_node_rcu_free(struct rcu_head *head)  {  	struct radix_tree_node *node =  			container_of(head, struct radix_tree_node, rcu_head); +	int i;  	/*  	 * must only free zeroed nodes into the slab. radix_tree_shrink  	 * can leave us with a non-NULL entry in the first slot, so clear  	 * that here to make sure.  	 */ -	tag_clear(node, 0, 0); -	tag_clear(node, 1, 0); +	for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) +		tag_clear(node, i, 0); +  	node->slots[0] = NULL;  	node->count = 0; @@ -609,6 +611,134 @@ int radix_tree_tag_get(struct radix_tree_root *root,  EXPORT_SYMBOL(radix_tree_tag_get);  /** + * radix_tree_range_tag_if_tagged - for each item in given range set given + *				   tag if item has another tag set + * @root:		radix tree root + * @first_indexp:	pointer to a starting index of a range to scan + * @last_index:		last index of a range to scan + * @nr_to_tag:		maximum number items to tag + * @iftag:		tag index to test + * @settag:		tag index to set if tested tag is set + * + * This function scans range of radix tree from first_index to last_index + * (inclusive).  For each item in the range if iftag is set, the function sets + * also settag. The function stops either after tagging nr_to_tag items or + * after reaching last_index. + * + * The tags must be set from the leaf level only and propagated back up the + * path to the root. We must do this so that we resolve the full path before + * setting any tags on intermediate nodes. If we set tags as we descend, then + * we can get to the leaf node and find that the index that has the iftag + * set is outside the range we are scanning. This reults in dangling tags and + * can lead to problems with later tag operations (e.g. livelocks on lookups). + * + * The function returns number of leaves where the tag was set and sets + * *first_indexp to the first unscanned index. + * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must + * be prepared to handle that. + */ +unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, +		unsigned long *first_indexp, unsigned long last_index, +		unsigned long nr_to_tag, +		unsigned int iftag, unsigned int settag) +{ +	unsigned int height = root->height; +	struct radix_tree_path path[height]; +	struct radix_tree_path *pathp = path; +	struct radix_tree_node *slot; +	unsigned int shift; +	unsigned long tagged = 0; +	unsigned long index = *first_indexp; + +	last_index = min(last_index, radix_tree_maxindex(height)); +	if (index > last_index) +		return 0; +	if (!nr_to_tag) +		return 0; +	if (!root_tag_get(root, iftag)) { +		*first_indexp = last_index + 1; +		return 0; +	} +	if (height == 0) { +		*first_indexp = last_index + 1; +		root_tag_set(root, settag); +		return 1; +	} + +	shift = (height - 1) * RADIX_TREE_MAP_SHIFT; +	slot = radix_tree_indirect_to_ptr(root->rnode); + +	/* +	 * we fill the path from (root->height - 2) to 0, leaving the index at +	 * (root->height - 1) as a terminator. Zero the node in the terminator +	 * so that we can use this to end walk loops back up the path. +	 */ +	path[height - 1].node = NULL; + +	for (;;) { +		int offset; + +		offset = (index >> shift) & RADIX_TREE_MAP_MASK; +		if (!slot->slots[offset]) +			goto next; +		if (!tag_get(slot, iftag, offset)) +			goto next; +		if (height > 1) { +			/* Go down one level */ +			height--; +			shift -= RADIX_TREE_MAP_SHIFT; +			path[height - 1].node = slot; +			path[height - 1].offset = offset; +			slot = slot->slots[offset]; +			continue; +		} + +		/* tag the leaf */ +		tagged++; +		tag_set(slot, settag, offset); + +		/* walk back up the path tagging interior nodes */ +		pathp = &path[0]; +		while (pathp->node) { +			/* stop if we find a node with the tag already set */ +			if (tag_get(pathp->node, settag, pathp->offset)) +				break; +			tag_set(pathp->node, settag, pathp->offset); +			pathp++; +		} + +next: +		/* Go to next item at level determined by 'shift' */ +		index = ((index >> shift) + 1) << shift; +		/* Overflow can happen when last_index is ~0UL... */ +		if (index > last_index || !index) +			break; +		if (tagged >= nr_to_tag) +			break; +		while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) { +			/* +			 * We've fully scanned this node. Go up. Because +			 * last_index is guaranteed to be in the tree, what +			 * we do below cannot wander astray. +			 */ +			slot = path[height - 1].node; +			height++; +			shift += RADIX_TREE_MAP_SHIFT; +		} +	} +	/* +	 * The iftag must have been set somewhere because otherwise +	 * we would return immediated at the beginning of the function +	 */ +	root_tag_set(root, settag); +	*first_indexp = index; + +	return tagged; +} +EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); + + +/**   *	radix_tree_next_hole    -    find the next hole (not-present entry)   *	@root:		tree root   *	@index:		index key diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore new file mode 100644 index 000000000000..162becacf97c --- /dev/null +++ b/lib/raid6/.gitignore @@ -0,0 +1,4 @@ +mktables +altivec*.c +int*.c +tables.c diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile new file mode 100644 index 000000000000..8a38102770f3 --- /dev/null +++ b/lib/raid6/Makefile @@ -0,0 +1,75 @@ +obj-$(CONFIG_RAID6_PQ)	+= raid6_pq.o + +raid6_pq-y	+= algos.o recov.o tables.o int1.o int2.o int4.o \ +		   int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \ +		   altivec8.o mmx.o sse1.o sse2.o +hostprogs-y	+= mktables + +quiet_cmd_unroll = UNROLL  $@ +      cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \ +                   < $< > $@ || ( rm -f $@ && exit 1 ) + +ifeq ($(CONFIG_ALTIVEC),y) +altivec_flags := -maltivec -mabi=altivec +endif + +targets += int1.c +$(obj)/int1.c:   UNROLL := 1 +$(obj)/int1.c:   $(src)/int.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +targets += int2.c +$(obj)/int2.c:   UNROLL := 2 +$(obj)/int2.c:   $(src)/int.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +targets += int4.c +$(obj)/int4.c:   UNROLL := 4 +$(obj)/int4.c:   $(src)/int.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +targets += int8.c +$(obj)/int8.c:   UNROLL := 8 +$(obj)/int8.c:   $(src)/int.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +targets += int16.c +$(obj)/int16.c:  UNROLL := 16 +$(obj)/int16.c:  $(src)/int.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +targets += int32.c +$(obj)/int32.c:  UNROLL := 32 +$(obj)/int32.c:  $(src)/int.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +CFLAGS_altivec1.o += $(altivec_flags) +targets += altivec1.c +$(obj)/altivec1.c:   UNROLL := 1 +$(obj)/altivec1.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +CFLAGS_altivec2.o += $(altivec_flags) +targets += altivec2.c +$(obj)/altivec2.c:   UNROLL := 2 +$(obj)/altivec2.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +CFLAGS_altivec4.o += $(altivec_flags) +targets += altivec4.c +$(obj)/altivec4.c:   UNROLL := 4 +$(obj)/altivec4.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +CFLAGS_altivec8.o += $(altivec_flags) +targets += altivec8.c +$(obj)/altivec8.c:   UNROLL := 8 +$(obj)/altivec8.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +quiet_cmd_mktable = TABLE   $@ +      cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) + +targets += tables.c +$(obj)/tables.c: $(obj)/mktables FORCE +	$(call if_changed,mktable) diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c new file mode 100644 index 000000000000..b595f560bee7 --- /dev/null +++ b/lib/raid6/algos.c @@ -0,0 +1,154 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002 H. Peter Anvin - All Rights Reserved + * + *   This program is free software; you can redistribute it and/or modify + *   it under the terms of the GNU General Public License as published by + *   the Free Software Foundation, Inc., 53 Temple Place Ste 330, + *   Boston MA 02111-1307, USA; either version 2 of the License, or + *   (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/algos.c + * + * Algorithm list and algorithm selection for RAID-6 + */ + +#include <linux/raid/pq.h> +#ifndef __KERNEL__ +#include <sys/mman.h> +#include <stdio.h> +#else +#include <linux/gfp.h> +#if !RAID6_USE_EMPTY_ZERO_PAGE +/* In .bss so it's zeroed */ +const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); +EXPORT_SYMBOL(raid6_empty_zero_page); +#endif +#endif + +struct raid6_calls raid6_call; +EXPORT_SYMBOL_GPL(raid6_call); + +const struct raid6_calls * const raid6_algos[] = { +	&raid6_intx1, +	&raid6_intx2, +	&raid6_intx4, +	&raid6_intx8, +#if defined(__ia64__) +	&raid6_intx16, +	&raid6_intx32, +#endif +#if defined(__i386__) && !defined(__arch_um__) +	&raid6_mmxx1, +	&raid6_mmxx2, +	&raid6_sse1x1, +	&raid6_sse1x2, +	&raid6_sse2x1, +	&raid6_sse2x2, +#endif +#if defined(__x86_64__) && !defined(__arch_um__) +	&raid6_sse2x1, +	&raid6_sse2x2, +	&raid6_sse2x4, +#endif +#ifdef CONFIG_ALTIVEC +	&raid6_altivec1, +	&raid6_altivec2, +	&raid6_altivec4, +	&raid6_altivec8, +#endif +	NULL +}; + +#ifdef __KERNEL__ +#define RAID6_TIME_JIFFIES_LG2	4 +#else +/* Need more time to be stable in userspace */ +#define RAID6_TIME_JIFFIES_LG2	9 +#define time_before(x, y) ((x) < (y)) +#endif + +/* Try to pick the best algorithm */ +/* This code uses the gfmul table as convenient data set to abuse */ + +int __init raid6_select_algo(void) +{ +	const struct raid6_calls * const * algo; +	const struct raid6_calls * best; +	char *syndromes; +	void *dptrs[(65536/PAGE_SIZE)+2]; +	int i, disks; +	unsigned long perf, bestperf; +	int bestprefer; +	unsigned long j0, j1; + +	disks = (65536/PAGE_SIZE)+2; +	for ( i = 0 ; i < disks-2 ; i++ ) { +		dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; +	} + +	/* Normal code - use a 2-page allocation to avoid D$ conflict */ +	syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); + +	if ( !syndromes ) { +		printk("raid6: Yikes!  No memory available.\n"); +		return -ENOMEM; +	} + +	dptrs[disks-2] = syndromes; +	dptrs[disks-1] = syndromes + PAGE_SIZE; + +	bestperf = 0;  bestprefer = 0;  best = NULL; + +	for ( algo = raid6_algos ; *algo ; algo++ ) { +		if ( !(*algo)->valid || (*algo)->valid() ) { +			perf = 0; + +			preempt_disable(); +			j0 = jiffies; +			while ( (j1 = jiffies) == j0 ) +				cpu_relax(); +			while (time_before(jiffies, +					    j1 + (1<<RAID6_TIME_JIFFIES_LG2))) { +				(*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs); +				perf++; +			} +			preempt_enable(); + +			if ( (*algo)->prefer > bestprefer || +			     ((*algo)->prefer == bestprefer && +			      perf > bestperf) ) { +				best = *algo; +				bestprefer = best->prefer; +				bestperf = perf; +			} +			printk("raid6: %-8s %5ld MB/s\n", (*algo)->name, +			       (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); +		} +	} + +	if (best) { +		printk("raid6: using algorithm %s (%ld MB/s)\n", +		       best->name, +		       (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); +		raid6_call = *best; +	} else +		printk("raid6: Yikes!  No algorithm found!\n"); + +	free_pages((unsigned long)syndromes, 1); + +	return best ? 0 : -EINVAL; +} + +static void raid6_exit(void) +{ +	do { } while (0); +} + +subsys_initcall(raid6_select_algo); +module_exit(raid6_exit); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("RAID6 Q-syndrome calculations"); diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc new file mode 100644 index 000000000000..2654d5c854be --- /dev/null +++ b/lib/raid6/altivec.uc @@ -0,0 +1,130 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + *   This program is free software; you can redistribute it and/or modify + *   it under the terms of the GNU General Public License as published by + *   the Free Software Foundation, Inc., 53 Temple Place Ste 330, + *   Boston MA 02111-1307, USA; either version 2 of the License, or + *   (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6altivec$#.c + * + * $#-way unrolled portable integer math RAID-6 instruction set + * + * This file is postprocessed using unroll.awk + * + * <benh> hpa: in process, + * you can just "steal" the vec unit with enable_kernel_altivec() (but + * bracked this with preempt_disable/enable or in a lock) + */ + +#include <linux/raid/pq.h> + +#ifdef CONFIG_ALTIVEC + +#include <altivec.h> +#ifdef __KERNEL__ +# include <asm/system.h> +# include <asm/cputable.h> +#endif + +/* + * This is the C data type to use.  We use a vector of + * signed char so vec_cmpgt() will generate the right + * instruction. + */ + +typedef vector signed char unative_t; + +#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) +#define NSIZE	sizeof(unative_t) + +/* + * The SHLBYTE() operation shifts each byte left by 1, *not* + * rolling over into the next byte + */ +static inline __attribute_const__ unative_t SHLBYTE(unative_t v) +{ +	return vec_add(v,v); +} + +/* + * The MASK() operation returns 0xFF in any byte for which the high + * bit is 1, 0x00 for any byte for which the high bit is 0. + */ +static inline __attribute_const__ unative_t MASK(unative_t v) +{ +	unative_t zv = NBYTES(0); + +	/* vec_cmpgt returns a vector bool char; thus the need for the cast */ +	return (unative_t)vec_cmpgt(zv, v); +} + + +/* This is noinline to make damned sure that gcc doesn't move any of the +   Altivec code around the enable/disable code */ +static void noinline +raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	unative_t wd$$, wq$$, wp$$, w1$$, w2$$; +	unative_t x1d = NBYTES(0x1d); + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { +		wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; +		for ( z = z0-1 ; z >= 0 ; z-- ) { +			wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; +			wp$$ = vec_xor(wp$$, wd$$); +			w2$$ = MASK(wq$$); +			w1$$ = SHLBYTE(wq$$); +			w2$$ = vec_and(w2$$, x1d); +			w1$$ = vec_xor(w1$$, w2$$); +			wq$$ = vec_xor(w1$$, wd$$); +		} +		*(unative_t *)&p[d+NSIZE*$$] = wp$$; +		*(unative_t *)&q[d+NSIZE*$$] = wq$$; +	} +} + +static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	preempt_disable(); +	enable_kernel_altivec(); + +	raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs); + +	preempt_enable(); +} + +int raid6_have_altivec(void); +#if $# == 1 +int raid6_have_altivec(void) +{ +	/* This assumes either all CPUs have Altivec or none does */ +# ifdef __KERNEL__ +	return cpu_has_feature(CPU_FTR_ALTIVEC); +# else +	return 1; +# endif +} +#endif + +const struct raid6_calls raid6_altivec$# = { +	raid6_altivec$#_gen_syndrome, +	raid6_have_altivec, +	"altivecx$#", +	0 +}; + +#endif /* CONFIG_ALTIVEC */ diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc new file mode 100644 index 000000000000..d1e276a14fab --- /dev/null +++ b/lib/raid6/int.uc @@ -0,0 +1,117 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + *   This program is free software; you can redistribute it and/or modify + *   it under the terms of the GNU General Public License as published by + *   the Free Software Foundation, Inc., 53 Temple Place Ste 330, + *   Boston MA 02111-1307, USA; either version 2 of the License, or + *   (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6int$#.c + * + * $#-way unrolled portable integer math RAID-6 instruction set + * + * This file is postprocessed using unroll.awk + */ + +#include <linux/raid/pq.h> + +/* + * This is the C data type to use + */ + +/* Change this from BITS_PER_LONG if there is something better... */ +#if BITS_PER_LONG == 64 +# define NBYTES(x) ((x) * 0x0101010101010101UL) +# define NSIZE  8 +# define NSHIFT 3 +# define NSTRING "64" +typedef u64 unative_t; +#else +# define NBYTES(x) ((x) * 0x01010101U) +# define NSIZE  4 +# define NSHIFT 2 +# define NSTRING "32" +typedef u32 unative_t; +#endif + + + +/* + * IA-64 wants insane amounts of unrolling.  On other architectures that + * is just a waste of space. + */ +#if ($# <= 8) || defined(__ia64__) + + +/* + * These sub-operations are separate inlines since they can sometimes be + * specially optimized using architecture-specific hacks. + */ + +/* + * The SHLBYTE() operation shifts each byte left by 1, *not* + * rolling over into the next byte + */ +static inline __attribute_const__ unative_t SHLBYTE(unative_t v) +{ +	unative_t vv; + +	vv = (v << 1) & NBYTES(0xfe); +	return vv; +} + +/* + * The MASK() operation returns 0xFF in any byte for which the high + * bit is 1, 0x00 for any byte for which the high bit is 0. + */ +static inline __attribute_const__ unative_t MASK(unative_t v) +{ +	unative_t vv; + +	vv = v & NBYTES(0x80); +	vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */ +	return vv; +} + + +static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	unative_t wd$$, wq$$, wp$$, w1$$, w2$$; + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { +		wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; +		for ( z = z0-1 ; z >= 0 ; z-- ) { +			wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; +			wp$$ ^= wd$$; +			w2$$ = MASK(wq$$); +			w1$$ = SHLBYTE(wq$$); +			w2$$ &= NBYTES(0x1d); +			w1$$ ^= w2$$; +			wq$$ = w1$$ ^ wd$$; +		} +		*(unative_t *)&p[d+NSIZE*$$] = wp$$; +		*(unative_t *)&q[d+NSIZE*$$] = wq$$; +	} +} + +const struct raid6_calls raid6_intx$# = { +	raid6_int$#_gen_syndrome, +	NULL,		/* always valid */ +	"int" NSTRING "x$#", +	0 +}; + +#endif diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c new file mode 100644 index 000000000000..3b1500843bba --- /dev/null +++ b/lib/raid6/mktables.c @@ -0,0 +1,132 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002-2007 H. Peter Anvin - All Rights Reserved + * + *   This file is part of the Linux kernel, and is made available under + *   the terms of the GNU General Public License version 2 or (at your + *   option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * mktables.c + * + * Make RAID-6 tables.  This is a host user space program to be run at + * compile time. + */ + +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <stdlib.h> +#include <time.h> + +static uint8_t gfmul(uint8_t a, uint8_t b) +{ +	uint8_t v = 0; + +	while (b) { +		if (b & 1) +			v ^= a; +		a = (a << 1) ^ (a & 0x80 ? 0x1d : 0); +		b >>= 1; +	} + +	return v; +} + +static uint8_t gfpow(uint8_t a, int b) +{ +	uint8_t v = 1; + +	b %= 255; +	if (b < 0) +		b += 255; + +	while (b) { +		if (b & 1) +			v = gfmul(v, a); +		a = gfmul(a, a); +		b >>= 1; +	} + +	return v; +} + +int main(int argc, char *argv[]) +{ +	int i, j, k; +	uint8_t v; +	uint8_t exptbl[256], invtbl[256]; + +	printf("#include <linux/raid/pq.h>\n"); + +	/* Compute multiplication table */ +	printf("\nconst u8  __attribute__((aligned(256)))\n" +		"raid6_gfmul[256][256] =\n" +		"{\n"); +	for (i = 0; i < 256; i++) { +		printf("\t{\n"); +		for (j = 0; j < 256; j += 8) { +			printf("\t\t"); +			for (k = 0; k < 8; k++) +				printf("0x%02x,%c", gfmul(i, j + k), +				       (k == 7) ? '\n' : ' '); +		} +		printf("\t},\n"); +	} +	printf("};\n"); +	printf("#ifdef __KERNEL__\n"); +	printf("EXPORT_SYMBOL(raid6_gfmul);\n"); +	printf("#endif\n"); + +	/* Compute power-of-2 table (exponent) */ +	v = 1; +	printf("\nconst u8 __attribute__((aligned(256)))\n" +	       "raid6_gfexp[256] =\n" "{\n"); +	for (i = 0; i < 256; i += 8) { +		printf("\t"); +		for (j = 0; j < 8; j++) { +			exptbl[i + j] = v; +			printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); +			v = gfmul(v, 2); +			if (v == 1) +				v = 0;	/* For entry 255, not a real entry */ +		} +	} +	printf("};\n"); +	printf("#ifdef __KERNEL__\n"); +	printf("EXPORT_SYMBOL(raid6_gfexp);\n"); +	printf("#endif\n"); + +	/* Compute inverse table x^-1 == x^254 */ +	printf("\nconst u8 __attribute__((aligned(256)))\n" +	       "raid6_gfinv[256] =\n" "{\n"); +	for (i = 0; i < 256; i += 8) { +		printf("\t"); +		for (j = 0; j < 8; j++) { +			invtbl[i + j] = v = gfpow(i + j, 254); +			printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); +		} +	} +	printf("};\n"); +	printf("#ifdef __KERNEL__\n"); +	printf("EXPORT_SYMBOL(raid6_gfinv);\n"); +	printf("#endif\n"); + +	/* Compute inv(2^x + 1) (exponent-xor-inverse) table */ +	printf("\nconst u8 __attribute__((aligned(256)))\n" +	       "raid6_gfexi[256] =\n" "{\n"); +	for (i = 0; i < 256; i += 8) { +		printf("\t"); +		for (j = 0; j < 8; j++) +			printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1], +			       (j == 7) ? '\n' : ' '); +	} +	printf("};\n"); +	printf("#ifdef __KERNEL__\n"); +	printf("EXPORT_SYMBOL(raid6_gfexi);\n"); +	printf("#endif\n"); + +	return 0; +} diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c new file mode 100644 index 000000000000..279347f23094 --- /dev/null +++ b/lib/raid6/mmx.c @@ -0,0 +1,142 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002 H. Peter Anvin - All Rights Reserved + * + *   This program is free software; you can redistribute it and/or modify + *   it under the terms of the GNU General Public License as published by + *   the Free Software Foundation, Inc., 53 Temple Place Ste 330, + *   Boston MA 02111-1307, USA; either version 2 of the License, or + *   (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/mmx.c + * + * MMX implementation of RAID-6 syndrome functions + */ + +#if defined(__i386__) && !defined(__arch_um__) + +#include <linux/raid/pq.h> +#include "x86.h" + +/* Shared with raid6/sse1.c */ +const struct raid6_mmx_constants { +	u64 x1d; +} raid6_mmx_constants = { +	0x1d1d1d1d1d1d1d1dULL, +}; + +static int raid6_have_mmx(void) +{ +	/* Not really "boot_cpu" but "all_cpus" */ +	return boot_cpu_has(X86_FEATURE_MMX); +} + +/* + * Plain MMX implementation + */ +static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	kernel_fpu_begin(); + +	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); +	asm volatile("pxor %mm5,%mm5");	/* Zero temp */ + +	for ( d = 0 ; d < bytes ; d += 8 ) { +		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ +		asm volatile("movq %mm2,%mm4");	/* Q[0] */ +		for ( z = z0-1 ; z >= 0 ; z-- ) { +			asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); +			asm volatile("pcmpgtb %mm4,%mm5"); +			asm volatile("paddb %mm4,%mm4"); +			asm volatile("pand %mm0,%mm5"); +			asm volatile("pxor %mm5,%mm4"); +			asm volatile("pxor %mm5,%mm5"); +			asm volatile("pxor %mm6,%mm2"); +			asm volatile("pxor %mm6,%mm4"); +		} +		asm volatile("movq %%mm2,%0" : "=m" (p[d])); +		asm volatile("pxor %mm2,%mm2"); +		asm volatile("movq %%mm4,%0" : "=m" (q[d])); +		asm volatile("pxor %mm4,%mm4"); +	} + +	kernel_fpu_end(); +} + +const struct raid6_calls raid6_mmxx1 = { +	raid6_mmx1_gen_syndrome, +	raid6_have_mmx, +	"mmxx1", +	0 +}; + +/* + * Unrolled-by-2 MMX implementation + */ +static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	kernel_fpu_begin(); + +	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); +	asm volatile("pxor %mm5,%mm5");	/* Zero temp */ +	asm volatile("pxor %mm7,%mm7"); /* Zero temp */ + +	for ( d = 0 ; d < bytes ; d += 16 ) { +		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ +		asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); +		asm volatile("movq %mm2,%mm4"); /* Q[0] */ +		asm volatile("movq %mm3,%mm6"); /* Q[1] */ +		for ( z = z0-1 ; z >= 0 ; z-- ) { +			asm volatile("pcmpgtb %mm4,%mm5"); +			asm volatile("pcmpgtb %mm6,%mm7"); +			asm volatile("paddb %mm4,%mm4"); +			asm volatile("paddb %mm6,%mm6"); +			asm volatile("pand %mm0,%mm5"); +			asm volatile("pand %mm0,%mm7"); +			asm volatile("pxor %mm5,%mm4"); +			asm volatile("pxor %mm7,%mm6"); +			asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); +			asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); +			asm volatile("pxor %mm5,%mm2"); +			asm volatile("pxor %mm7,%mm3"); +			asm volatile("pxor %mm5,%mm4"); +			asm volatile("pxor %mm7,%mm6"); +			asm volatile("pxor %mm5,%mm5"); +			asm volatile("pxor %mm7,%mm7"); +		} +		asm volatile("movq %%mm2,%0" : "=m" (p[d])); +		asm volatile("movq %%mm3,%0" : "=m" (p[d+8])); +		asm volatile("movq %%mm4,%0" : "=m" (q[d])); +		asm volatile("movq %%mm6,%0" : "=m" (q[d+8])); +	} + +	kernel_fpu_end(); +} + +const struct raid6_calls raid6_mmxx2 = { +	raid6_mmx2_gen_syndrome, +	raid6_have_mmx, +	"mmxx2", +	0 +}; + +#endif diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c new file mode 100644 index 000000000000..8590d19cf522 --- /dev/null +++ b/lib/raid6/recov.c @@ -0,0 +1,132 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002 H. Peter Anvin - All Rights Reserved + * + *   This program is free software; you can redistribute it and/or modify + *   it under the terms of the GNU General Public License as published by + *   the Free Software Foundation, Inc., 53 Temple Place Ste 330, + *   Boston MA 02111-1307, USA; either version 2 of the License, or + *   (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/recov.c + * + * RAID-6 data recovery in dual failure mode.  In single failure mode, + * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct + * the syndrome.) + */ + +#include <linux/raid/pq.h> + +/* Recover two failed data blocks. */ +void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, +		       void **ptrs) +{ +	u8 *p, *q, *dp, *dq; +	u8 px, qx, db; +	const u8 *pbmul;	/* P multiplier table for B data */ +	const u8 *qmul;		/* Q multiplier table (for both) */ + +	p = (u8 *)ptrs[disks-2]; +	q = (u8 *)ptrs[disks-1]; + +	/* Compute syndrome with zero for the missing data pages +	   Use the dead data pages as temporary storage for +	   delta p and delta q */ +	dp = (u8 *)ptrs[faila]; +	ptrs[faila] = (void *)raid6_empty_zero_page; +	ptrs[disks-2] = dp; +	dq = (u8 *)ptrs[failb]; +	ptrs[failb] = (void *)raid6_empty_zero_page; +	ptrs[disks-1] = dq; + +	raid6_call.gen_syndrome(disks, bytes, ptrs); + +	/* Restore pointer table */ +	ptrs[faila]   = dp; +	ptrs[failb]   = dq; +	ptrs[disks-2] = p; +	ptrs[disks-1] = q; + +	/* Now, pick the proper data tables */ +	pbmul = raid6_gfmul[raid6_gfexi[failb-faila]]; +	qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]]; + +	/* Now do it... */ +	while ( bytes-- ) { +		px    = *p ^ *dp; +		qx    = qmul[*q ^ *dq]; +		*dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */ +		*dp++ = db ^ px; /* Reconstructed A */ +		p++; q++; +	} +} +EXPORT_SYMBOL_GPL(raid6_2data_recov); + +/* Recover failure of one data block plus the P block */ +void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) +{ +	u8 *p, *q, *dq; +	const u8 *qmul;		/* Q multiplier table */ + +	p = (u8 *)ptrs[disks-2]; +	q = (u8 *)ptrs[disks-1]; + +	/* Compute syndrome with zero for the missing data page +	   Use the dead data page as temporary storage for delta q */ +	dq = (u8 *)ptrs[faila]; +	ptrs[faila] = (void *)raid6_empty_zero_page; +	ptrs[disks-1] = dq; + +	raid6_call.gen_syndrome(disks, bytes, ptrs); + +	/* Restore pointer table */ +	ptrs[faila]   = dq; +	ptrs[disks-1] = q; + +	/* Now, pick the proper data tables */ +	qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]]; + +	/* Now do it... */ +	while ( bytes-- ) { +		*p++ ^= *dq = qmul[*q ^ *dq]; +		q++; dq++; +	} +} +EXPORT_SYMBOL_GPL(raid6_datap_recov); + +#ifndef __KERNEL__ +/* Testing only */ + +/* Recover two failed blocks. */ +void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs) +{ +	if ( faila > failb ) { +		int tmp = faila; +		faila = failb; +		failb = tmp; +	} + +	if ( failb == disks-1 ) { +		if ( faila == disks-2 ) { +			/* P+Q failure.  Just rebuild the syndrome. */ +			raid6_call.gen_syndrome(disks, bytes, ptrs); +		} else { +			/* data+Q failure.  Reconstruct data from P, +			   then rebuild syndrome. */ +			/* NOT IMPLEMENTED - equivalent to RAID-5 */ +		} +	} else { +		if ( failb == disks-2 ) { +			/* data+P failure. */ +			raid6_datap_recov(disks, bytes, faila, ptrs); +		} else { +			/* data+data failure. */ +			raid6_2data_recov(disks, bytes, faila, failb, ptrs); +		} +	} +} + +#endif diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c new file mode 100644 index 000000000000..10dd91948c07 --- /dev/null +++ b/lib/raid6/sse1.c @@ -0,0 +1,162 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002 H. Peter Anvin - All Rights Reserved + * + *   This program is free software; you can redistribute it and/or modify + *   it under the terms of the GNU General Public License as published by + *   the Free Software Foundation, Inc., 53 Temple Place Ste 330, + *   Boston MA 02111-1307, USA; either version 2 of the License, or + *   (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/sse1.c + * + * SSE-1/MMXEXT implementation of RAID-6 syndrome functions + * + * This is really an MMX implementation, but it requires SSE-1 or + * AMD MMXEXT for prefetch support and a few other features.  The + * support for nontemporal memory accesses is enough to make this + * worthwhile as a separate implementation. + */ + +#if defined(__i386__) && !defined(__arch_um__) + +#include <linux/raid/pq.h> +#include "x86.h" + +/* Defined in raid6/mmx.c */ +extern const struct raid6_mmx_constants { +	u64 x1d; +} raid6_mmx_constants; + +static int raid6_have_sse1_or_mmxext(void) +{ +	/* Not really boot_cpu but "all_cpus" */ +	return boot_cpu_has(X86_FEATURE_MMX) && +		(boot_cpu_has(X86_FEATURE_XMM) || +		 boot_cpu_has(X86_FEATURE_MMXEXT)); +} + +/* + * Plain SSE1 implementation + */ +static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	kernel_fpu_begin(); + +	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); +	asm volatile("pxor %mm5,%mm5");	/* Zero temp */ + +	for ( d = 0 ; d < bytes ; d += 8 ) { +		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); +		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ +		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); +		asm volatile("movq %mm2,%mm4");	/* Q[0] */ +		asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d])); +		for ( z = z0-2 ; z >= 0 ; z-- ) { +			asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); +			asm volatile("pcmpgtb %mm4,%mm5"); +			asm volatile("paddb %mm4,%mm4"); +			asm volatile("pand %mm0,%mm5"); +			asm volatile("pxor %mm5,%mm4"); +			asm volatile("pxor %mm5,%mm5"); +			asm volatile("pxor %mm6,%mm2"); +			asm volatile("pxor %mm6,%mm4"); +			asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); +		} +		asm volatile("pcmpgtb %mm4,%mm5"); +		asm volatile("paddb %mm4,%mm4"); +		asm volatile("pand %mm0,%mm5"); +		asm volatile("pxor %mm5,%mm4"); +		asm volatile("pxor %mm5,%mm5"); +		asm volatile("pxor %mm6,%mm2"); +		asm volatile("pxor %mm6,%mm4"); + +		asm volatile("movntq %%mm2,%0" : "=m" (p[d])); +		asm volatile("movntq %%mm4,%0" : "=m" (q[d])); +	} + +	asm volatile("sfence" : : : "memory"); +	kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse1x1 = { +	raid6_sse11_gen_syndrome, +	raid6_have_sse1_or_mmxext, +	"sse1x1", +	1			/* Has cache hints */ +}; + +/* + * Unrolled-by-2 SSE1 implementation + */ +static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	kernel_fpu_begin(); + +	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); +	asm volatile("pxor %mm5,%mm5");	/* Zero temp */ +	asm volatile("pxor %mm7,%mm7"); /* Zero temp */ + +	/* We uniformly assume a single prefetch covers at least 16 bytes */ +	for ( d = 0 ; d < bytes ; d += 16 ) { +		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); +		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ +		asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */ +		asm volatile("movq %mm2,%mm4");	/* Q[0] */ +		asm volatile("movq %mm3,%mm6"); /* Q[1] */ +		for ( z = z0-1 ; z >= 0 ; z-- ) { +			asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); +			asm volatile("pcmpgtb %mm4,%mm5"); +			asm volatile("pcmpgtb %mm6,%mm7"); +			asm volatile("paddb %mm4,%mm4"); +			asm volatile("paddb %mm6,%mm6"); +			asm volatile("pand %mm0,%mm5"); +			asm volatile("pand %mm0,%mm7"); +			asm volatile("pxor %mm5,%mm4"); +			asm volatile("pxor %mm7,%mm6"); +			asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); +			asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); +			asm volatile("pxor %mm5,%mm2"); +			asm volatile("pxor %mm7,%mm3"); +			asm volatile("pxor %mm5,%mm4"); +			asm volatile("pxor %mm7,%mm6"); +			asm volatile("pxor %mm5,%mm5"); +			asm volatile("pxor %mm7,%mm7"); +		} +		asm volatile("movntq %%mm2,%0" : "=m" (p[d])); +		asm volatile("movntq %%mm3,%0" : "=m" (p[d+8])); +		asm volatile("movntq %%mm4,%0" : "=m" (q[d])); +		asm volatile("movntq %%mm6,%0" : "=m" (q[d+8])); +	} + +	asm volatile("sfence" : :: "memory"); +	kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse1x2 = { +	raid6_sse12_gen_syndrome, +	raid6_have_sse1_or_mmxext, +	"sse1x2", +	1			/* Has cache hints */ +}; + +#endif diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c new file mode 100644 index 000000000000..bc2d57daa589 --- /dev/null +++ b/lib/raid6/sse2.c @@ -0,0 +1,262 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002 H. Peter Anvin - All Rights Reserved + * + *   This program is free software; you can redistribute it and/or modify + *   it under the terms of the GNU General Public License as published by + *   the Free Software Foundation, Inc., 53 Temple Place Ste 330, + *   Boston MA 02111-1307, USA; either version 2 of the License, or + *   (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/sse2.c + * + * SSE-2 implementation of RAID-6 syndrome functions + * + */ + +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + +#include <linux/raid/pq.h> +#include "x86.h" + +static const struct raid6_sse_constants { +	u64 x1d[2]; +} raid6_sse_constants  __attribute__((aligned(16))) = { +	{ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL }, +}; + +static int raid6_have_sse2(void) +{ +	/* Not really boot_cpu but "all_cpus" */ +	return boot_cpu_has(X86_FEATURE_MMX) && +		boot_cpu_has(X86_FEATURE_FXSR) && +		boot_cpu_has(X86_FEATURE_XMM) && +		boot_cpu_has(X86_FEATURE_XMM2); +} + +/* + * Plain SSE2 implementation + */ +static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	kernel_fpu_begin(); + +	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); +	asm volatile("pxor %xmm5,%xmm5");	/* Zero temp */ + +	for ( d = 0 ; d < bytes ; d += 16 ) { +		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); +		asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */ +		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); +		asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ +		asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d])); +		for ( z = z0-2 ; z >= 0 ; z-- ) { +			asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); +			asm volatile("pcmpgtb %xmm4,%xmm5"); +			asm volatile("paddb %xmm4,%xmm4"); +			asm volatile("pand %xmm0,%xmm5"); +			asm volatile("pxor %xmm5,%xmm4"); +			asm volatile("pxor %xmm5,%xmm5"); +			asm volatile("pxor %xmm6,%xmm2"); +			asm volatile("pxor %xmm6,%xmm4"); +			asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d])); +		} +		asm volatile("pcmpgtb %xmm4,%xmm5"); +		asm volatile("paddb %xmm4,%xmm4"); +		asm volatile("pand %xmm0,%xmm5"); +		asm volatile("pxor %xmm5,%xmm4"); +		asm volatile("pxor %xmm5,%xmm5"); +		asm volatile("pxor %xmm6,%xmm2"); +		asm volatile("pxor %xmm6,%xmm4"); + +		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); +		asm volatile("pxor %xmm2,%xmm2"); +		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); +		asm volatile("pxor %xmm4,%xmm4"); +	} + +	asm volatile("sfence" : : : "memory"); +	kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse2x1 = { +	raid6_sse21_gen_syndrome, +	raid6_have_sse2, +	"sse2x1", +	1			/* Has cache hints */ +}; + +/* + * Unrolled-by-2 SSE2 implementation + */ +static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	kernel_fpu_begin(); + +	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); +	asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ +	asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */ + +	/* We uniformly assume a single prefetch covers at least 32 bytes */ +	for ( d = 0 ; d < bytes ; d += 32 ) { +		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); +		asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d]));    /* P[0] */ +		asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */ +		asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ +		asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */ +		for ( z = z0-1 ; z >= 0 ; z-- ) { +			asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); +			asm volatile("pcmpgtb %xmm4,%xmm5"); +			asm volatile("pcmpgtb %xmm6,%xmm7"); +			asm volatile("paddb %xmm4,%xmm4"); +			asm volatile("paddb %xmm6,%xmm6"); +			asm volatile("pand %xmm0,%xmm5"); +			asm volatile("pand %xmm0,%xmm7"); +			asm volatile("pxor %xmm5,%xmm4"); +			asm volatile("pxor %xmm7,%xmm6"); +			asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d])); +			asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16])); +			asm volatile("pxor %xmm5,%xmm2"); +			asm volatile("pxor %xmm7,%xmm3"); +			asm volatile("pxor %xmm5,%xmm4"); +			asm volatile("pxor %xmm7,%xmm6"); +			asm volatile("pxor %xmm5,%xmm5"); +			asm volatile("pxor %xmm7,%xmm7"); +		} +		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); +		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); +		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); +		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); +	} + +	asm volatile("sfence" : : : "memory"); +	kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse2x2 = { +	raid6_sse22_gen_syndrome, +	raid6_have_sse2, +	"sse2x2", +	1			/* Has cache hints */ +}; + +#endif + +#if defined(__x86_64__) && !defined(__arch_um__) + +/* + * Unrolled-by-4 SSE2 implementation + */ +static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	kernel_fpu_begin(); + +	asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0])); +	asm volatile("pxor %xmm2,%xmm2");	/* P[0] */ +	asm volatile("pxor %xmm3,%xmm3");	/* P[1] */ +	asm volatile("pxor %xmm4,%xmm4"); 	/* Q[0] */ +	asm volatile("pxor %xmm5,%xmm5");	/* Zero temp */ +	asm volatile("pxor %xmm6,%xmm6"); 	/* Q[1] */ +	asm volatile("pxor %xmm7,%xmm7"); 	/* Zero temp */ +	asm volatile("pxor %xmm10,%xmm10");	/* P[2] */ +	asm volatile("pxor %xmm11,%xmm11");	/* P[3] */ +	asm volatile("pxor %xmm12,%xmm12"); 	/* Q[2] */ +	asm volatile("pxor %xmm13,%xmm13");	/* Zero temp */ +	asm volatile("pxor %xmm14,%xmm14"); 	/* Q[3] */ +	asm volatile("pxor %xmm15,%xmm15"); 	/* Zero temp */ + +	for ( d = 0 ; d < bytes ; d += 64 ) { +		for ( z = z0 ; z >= 0 ; z-- ) { +			/* The second prefetch seems to improve performance... */ +			asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); +			asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32])); +			asm volatile("pcmpgtb %xmm4,%xmm5"); +			asm volatile("pcmpgtb %xmm6,%xmm7"); +			asm volatile("pcmpgtb %xmm12,%xmm13"); +			asm volatile("pcmpgtb %xmm14,%xmm15"); +			asm volatile("paddb %xmm4,%xmm4"); +			asm volatile("paddb %xmm6,%xmm6"); +			asm volatile("paddb %xmm12,%xmm12"); +			asm volatile("paddb %xmm14,%xmm14"); +			asm volatile("pand %xmm0,%xmm5"); +			asm volatile("pand %xmm0,%xmm7"); +			asm volatile("pand %xmm0,%xmm13"); +			asm volatile("pand %xmm0,%xmm15"); +			asm volatile("pxor %xmm5,%xmm4"); +			asm volatile("pxor %xmm7,%xmm6"); +			asm volatile("pxor %xmm13,%xmm12"); +			asm volatile("pxor %xmm15,%xmm14"); +			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); +			asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); +			asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32])); +			asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48])); +			asm volatile("pxor %xmm5,%xmm2"); +			asm volatile("pxor %xmm7,%xmm3"); +			asm volatile("pxor %xmm13,%xmm10"); +			asm volatile("pxor %xmm15,%xmm11"); +			asm volatile("pxor %xmm5,%xmm4"); +			asm volatile("pxor %xmm7,%xmm6"); +			asm volatile("pxor %xmm13,%xmm12"); +			asm volatile("pxor %xmm15,%xmm14"); +			asm volatile("pxor %xmm5,%xmm5"); +			asm volatile("pxor %xmm7,%xmm7"); +			asm volatile("pxor %xmm13,%xmm13"); +			asm volatile("pxor %xmm15,%xmm15"); +		} +		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); +		asm volatile("pxor %xmm2,%xmm2"); +		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); +		asm volatile("pxor %xmm3,%xmm3"); +		asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32])); +		asm volatile("pxor %xmm10,%xmm10"); +		asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48])); +		asm volatile("pxor %xmm11,%xmm11"); +		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); +		asm volatile("pxor %xmm4,%xmm4"); +		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); +		asm volatile("pxor %xmm6,%xmm6"); +		asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32])); +		asm volatile("pxor %xmm12,%xmm12"); +		asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48])); +		asm volatile("pxor %xmm14,%xmm14"); +	} + +	asm volatile("sfence" : : : "memory"); +	kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse2x4 = { +	raid6_sse24_gen_syndrome, +	raid6_have_sse2, +	"sse2x4", +	1			/* Has cache hints */ +}; + +#endif diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile new file mode 100644 index 000000000000..aa651697b6dc --- /dev/null +++ b/lib/raid6/test/Makefile @@ -0,0 +1,72 @@ +# +# This is a simple Makefile to test some of the RAID-6 code +# from userspace. +# + +CC	 = gcc +OPTFLAGS = -O2			# Adjust as desired +CFLAGS	 = -I.. -I ../../../include -g $(OPTFLAGS) +LD	 = ld +AWK	 = awk -f +AR	 = ar +RANLIB	 = ranlib + +.c.o: +	$(CC) $(CFLAGS) -c -o $@ $< + +%.c: ../%.c +	cp -f $< $@ + +%.uc: ../%.uc +	cp -f $< $@ + +all:	raid6.a raid6test + +raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \ +	 altivec1.o altivec2.o altivec4.o altivec8.o recov.o algos.o \ +	 tables.o +	 rm -f $@ +	 $(AR) cq $@ $^ +	 $(RANLIB) $@ + +raid6test: test.c raid6.a +	$(CC) $(CFLAGS) -o raid6test $^ + +altivec1.c: altivec.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=1 < altivec.uc > $@ + +altivec2.c: altivec.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=2 < altivec.uc > $@ + +altivec4.c: altivec.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=4 < altivec.uc > $@ + +altivec8.c: altivec.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@ + +int1.c: int.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=1 < int.uc > $@ + +int2.c: int.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=2 < int.uc > $@ + +int4.c: int.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=4 < int.uc > $@ + +int8.c: int.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=8 < int.uc > $@ + +int16.c: int.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=16 < int.uc > $@ + +int32.c: int.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=32 < int.uc > $@ + +tables.c: mktables +	./mktables > tables.c + +clean: +	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test + +spotless: clean +	rm -f *~ diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c new file mode 100644 index 000000000000..7a930318b17d --- /dev/null +++ b/lib/raid6/test/test.c @@ -0,0 +1,124 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002-2007 H. Peter Anvin - All Rights Reserved + * + *   This file is part of the Linux kernel, and is made available under + *   the terms of the GNU General Public License version 2 or (at your + *   option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6test.c + * + * Test RAID-6 recovery with various algorithms + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <linux/raid/pq.h> + +#define NDISKS		16	/* Including P and Q */ + +const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); +struct raid6_calls raid6_call; + +char *dataptrs[NDISKS]; +char data[NDISKS][PAGE_SIZE]; +char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; + +static void makedata(void) +{ +	int i, j; + +	for (i = 0; i < NDISKS; i++) { +		for (j = 0; j < PAGE_SIZE; j++) +			data[i][j] = rand(); + +		dataptrs[i] = data[i]; +	} +} + +static char disk_type(int d) +{ +	switch (d) { +	case NDISKS-2: +		return 'P'; +	case NDISKS-1: +		return 'Q'; +	default: +		return 'D'; +	} +} + +static int test_disks(int i, int j) +{ +	int erra, errb; + +	memset(recovi, 0xf0, PAGE_SIZE); +	memset(recovj, 0xba, PAGE_SIZE); + +	dataptrs[i] = recovi; +	dataptrs[j] = recovj; + +	raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs); + +	erra = memcmp(data[i], recovi, PAGE_SIZE); +	errb = memcmp(data[j], recovj, PAGE_SIZE); + +	if (i < NDISKS-2 && j == NDISKS-1) { +		/* We don't implement the DQ failure scenario, since it's +		   equivalent to a RAID-5 failure (XOR, then recompute Q) */ +		erra = errb = 0; +	} else { +		printf("algo=%-8s  faila=%3d(%c)  failb=%3d(%c)  %s\n", +		       raid6_call.name, +		       i, disk_type(i), +		       j, disk_type(j), +		       (!erra && !errb) ? "OK" : +		       !erra ? "ERRB" : +		       !errb ? "ERRA" : "ERRAB"); +	} + +	dataptrs[i] = data[i]; +	dataptrs[j] = data[j]; + +	return erra || errb; +} + +int main(int argc, char *argv[]) +{ +	const struct raid6_calls *const *algo; +	int i, j; +	int err = 0; + +	makedata(); + +	for (algo = raid6_algos; *algo; algo++) { +		if (!(*algo)->valid || (*algo)->valid()) { +			raid6_call = **algo; + +			/* Nuke syndromes */ +			memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + +			/* Generate assumed good syndrome */ +			raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, +						(void **)&dataptrs); + +			for (i = 0; i < NDISKS-1; i++) +				for (j = i+1; j < NDISKS; j++) +					err += test_disks(i, j); +		} +		printf("\n"); +	} + +	printf("\n"); +	/* Pick the best algorithm test */ +	raid6_select_algo(); + +	if (err) +		printf("\n*** ERRORS FOUND ***\n"); + +	return err; +} diff --git a/lib/raid6/unroll.awk b/lib/raid6/unroll.awk new file mode 100644 index 000000000000..c6aa03631df8 --- /dev/null +++ b/lib/raid6/unroll.awk @@ -0,0 +1,20 @@ + +# This filter requires one command line option of form -vN=n +# where n must be a decimal number. +# +# Repeat each input line containing $$ n times, replacing $$ with 0...n-1. +# Replace each $# with n, and each $* with a single $. + +BEGIN { +	n = N + 0 +} +{ +	if (/\$\$/) { rep = n } else { rep = 1 } +	for (i = 0; i < rep; ++i) { +		tmp = $0 +		gsub(/\$\$/, i, tmp) +		gsub(/\$\#/, n, tmp) +		gsub(/\$\*/, "$", tmp) +		print tmp +	} +} diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h new file mode 100644 index 000000000000..cb2a8c91c886 --- /dev/null +++ b/lib/raid6/x86.h @@ -0,0 +1,61 @@ +/* ----------------------------------------------------------------------- * + * + *   Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + *   This program is free software; you can redistribute it and/or modify + *   it under the terms of the GNU General Public License as published by + *   the Free Software Foundation, Inc., 53 Temple Place Ste 330, + *   Boston MA 02111-1307, USA; either version 2 of the License, or + *   (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/x86.h + * + * Definitions common to x86 and x86-64 RAID-6 code only + */ + +#ifndef LINUX_RAID_RAID6X86_H +#define LINUX_RAID_RAID6X86_H + +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + +#ifdef __KERNEL__ /* Real code */ + +#include <asm/i387.h> + +#else /* Dummy code for user space testing */ + +static inline void kernel_fpu_begin(void) +{ +} + +static inline void kernel_fpu_end(void) +{ +} + +#define X86_FEATURE_MMX		(0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR	(0*32+24) /* FXSAVE and FXRSTOR instructions +					   * (fast save and restore) */ +#define X86_FEATURE_XMM		(0*32+25) /* Streaming SIMD Extensions */ +#define X86_FEATURE_XMM2	(0*32+26) /* Streaming SIMD Extensions-2 */ +#define X86_FEATURE_MMXEXT	(1*32+22) /* AMD MMX extensions */ + +/* Should work well enough on modern CPUs for testing */ +static inline int boot_cpu_has(int flag) +{ +	u32 eax = (flag >> 5) ? 0x80000001 : 1; +	u32 edx; + +	asm volatile("cpuid" +		     : "+a" (eax), "=d" (edx) +		     : : "ecx", "ebx"); + +	return (edx >> (flag & 31)) & 1; +} + +#endif /* ndef __KERNEL__ */ + +#endif +#endif diff --git a/lib/rwsem.c b/lib/rwsem.c index ceba8e28807a..f236d7cd5cf3 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -36,45 +36,56 @@ struct rwsem_waiter {  #define RWSEM_WAITING_FOR_WRITE	0x00000002  }; +/* Wake types for __rwsem_do_wake().  Note that RWSEM_WAKE_NO_ACTIVE and + * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held + * since the rwsem value was observed. + */ +#define RWSEM_WAKE_ANY        0 /* Wake whatever's at head of wait list */ +#define RWSEM_WAKE_NO_ACTIVE  1 /* rwsem was observed with no active thread */ +#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */ +  /*   * handle the lock release when processes blocked on it that can now run   * - if we come here from up_xxxx(), then:   *   - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)   *   - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) - *   - there must be someone on the queue + * - there must be someone on the queue   * - the spinlock must be held by the caller   * - woken process blocks are discarded from the list after having task zeroed   * - writers are only woken if downgrading is false   */ -static inline struct rw_semaphore * -__rwsem_do_wake(struct rw_semaphore *sem, int downgrading) +static struct rw_semaphore * +__rwsem_do_wake(struct rw_semaphore *sem, int wake_type)  {  	struct rwsem_waiter *waiter;  	struct task_struct *tsk;  	struct list_head *next; -	signed long oldcount, woken, loop; - -	if (downgrading) -		goto dont_wake_writers; - -	/* if we came through an up_xxxx() call, we only only wake someone up -	 * if we can transition the active part of the count from 0 -> 1 -	 */ - try_again: -	oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem) -						- RWSEM_ACTIVE_BIAS; -	if (oldcount & RWSEM_ACTIVE_MASK) -		goto undo; +	signed long oldcount, woken, loop, adjustment;  	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - -	/* try to grant a single write lock if there's a writer at the front -	 * of the queue - note we leave the 'active part' of the count -	 * incremented by 1 and the waiting part incremented by 0x00010000 -	 */  	if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))  		goto readers_only; +	if (wake_type == RWSEM_WAKE_READ_OWNED) +		/* Another active reader was observed, so wakeup is not +		 * likely to succeed. Save the atomic op. +		 */ +		goto out; + +	/* There's a writer at the front of the queue - try to grant it the +	 * write lock.  However, we only wake this writer if we can transition +	 * the active part of the count from 0 -> 1 +	 */ +	adjustment = RWSEM_ACTIVE_WRITE_BIAS; +	if (waiter->list.next == &sem->wait_list) +		adjustment -= RWSEM_WAITING_BIAS; + + try_again_write: +	oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; +	if (oldcount & RWSEM_ACTIVE_MASK) +		/* Someone grabbed the sem already */ +		goto undo_write; +  	/* We must be careful not to touch 'waiter' after we set ->task = NULL.  	 * It is an allocated on the waiter's stack and may become invalid at  	 * any time after that point (due to a wakeup from another source). @@ -87,18 +98,30 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)  	put_task_struct(tsk);  	goto out; -	/* don't want to wake any writers */ - dont_wake_writers: -	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); -	if (waiter->flags & RWSEM_WAITING_FOR_WRITE) + readers_only: +	/* If we come here from up_xxxx(), another thread might have reached +	 * rwsem_down_failed_common() before we acquired the spinlock and +	 * woken up a waiter, making it now active.  We prefer to check for +	 * this first in order to not spend too much time with the spinlock +	 * held if we're not going to be able to wake up readers in the end. +	 * +	 * Note that we do not need to update the rwsem count: any writer +	 * trying to acquire rwsem will run rwsem_down_write_failed() due +	 * to the waiting threads and block trying to acquire the spinlock. +	 * +	 * We use a dummy atomic update in order to acquire the cache line +	 * exclusively since we expect to succeed and run the final rwsem +	 * count adjustment pretty soon. +	 */ +	if (wake_type == RWSEM_WAKE_ANY && +	    rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS) +		/* Someone grabbed the sem for write already */  		goto out; -	/* grant an infinite number of read locks to the readers at the front -	 * of the queue -	 * - note we increment the 'active part' of the count by the number of -	 *   readers before waking any processes up +	/* Grant an infinite number of read locks to the readers at the front +	 * of the queue.  Note we increment the 'active part' of the count by +	 * the number of readers before waking any processes up.  	 */ - readers_only:  	woken = 0;  	do {  		woken++; @@ -111,16 +134,15 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)  	} while (waiter->flags & RWSEM_WAITING_FOR_READ); -	loop = woken; -	woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS; -	if (!downgrading) -		/* we'd already done one increment earlier */ -		woken -= RWSEM_ACTIVE_BIAS; +	adjustment = woken * RWSEM_ACTIVE_READ_BIAS; +	if (waiter->flags & RWSEM_WAITING_FOR_READ) +		/* hit end of list above */ +		adjustment -= RWSEM_WAITING_BIAS; -	rwsem_atomic_add(woken, sem); +	rwsem_atomic_add(adjustment, sem);  	next = sem->wait_list.next; -	for (; loop > 0; loop--) { +	for (loop = woken; loop > 0; loop--) {  		waiter = list_entry(next, struct rwsem_waiter, list);  		next = waiter->list.next;  		tsk = waiter->task; @@ -138,10 +160,10 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)  	/* undo the change to the active count, but check for a transition  	 * 1->0 */ - undo: -	if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) & RWSEM_ACTIVE_MASK) + undo_write: +	if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)  		goto out; -	goto try_again; +	goto try_again_write;  }  /* @@ -149,8 +171,9 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)   */  static struct rw_semaphore __sched *  rwsem_down_failed_common(struct rw_semaphore *sem, -			struct rwsem_waiter *waiter, signed long adjustment) +			 unsigned int flags, signed long adjustment)  { +	struct rwsem_waiter waiter;  	struct task_struct *tsk = current;  	signed long count; @@ -158,23 +181,34 @@ rwsem_down_failed_common(struct rw_semaphore *sem,  	/* set up my own style of waitqueue */  	spin_lock_irq(&sem->wait_lock); -	waiter->task = tsk; +	waiter.task = tsk; +	waiter.flags = flags;  	get_task_struct(tsk); -	list_add_tail(&waiter->list, &sem->wait_list); +	if (list_empty(&sem->wait_list)) +		adjustment += RWSEM_WAITING_BIAS; +	list_add_tail(&waiter.list, &sem->wait_list); -	/* we're now waiting on the lock, but no longer actively read-locking */ +	/* we're now waiting on the lock, but no longer actively locking */  	count = rwsem_atomic_update(adjustment, sem); -	/* if there are no active locks, wake the front queued process(es) up */ -	if (!(count & RWSEM_ACTIVE_MASK)) -		sem = __rwsem_do_wake(sem, 0); +	/* If there are no active locks, wake the front queued process(es) up. +	 * +	 * Alternatively, if we're called from a failed down_write(), there +	 * were already threads queued before us and there are no active +	 * writers, the lock must be read owned; so we try to wake any read +	 * locks that were queued ahead of us. */ +	if (count == RWSEM_WAITING_BIAS) +		sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); +	else if (count > RWSEM_WAITING_BIAS && +		 adjustment == -RWSEM_ACTIVE_WRITE_BIAS) +		sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);  	spin_unlock_irq(&sem->wait_lock);  	/* wait to be given the lock */  	for (;;) { -		if (!waiter->task) +		if (!waiter.task)  			break;  		schedule();  		set_task_state(tsk, TASK_UNINTERRUPTIBLE); @@ -191,12 +225,8 @@ rwsem_down_failed_common(struct rw_semaphore *sem,  asmregparm struct rw_semaphore __sched *  rwsem_down_read_failed(struct rw_semaphore *sem)  { -	struct rwsem_waiter waiter; - -	waiter.flags = RWSEM_WAITING_FOR_READ; -	rwsem_down_failed_common(sem, &waiter, -				RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS); -	return sem; +	return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ, +					-RWSEM_ACTIVE_READ_BIAS);  }  /* @@ -205,12 +235,8 @@ rwsem_down_read_failed(struct rw_semaphore *sem)  asmregparm struct rw_semaphore __sched *  rwsem_down_write_failed(struct rw_semaphore *sem)  { -	struct rwsem_waiter waiter; - -	waiter.flags = RWSEM_WAITING_FOR_WRITE; -	rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS); - -	return sem; +	return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE, +					-RWSEM_ACTIVE_WRITE_BIAS);  }  /* @@ -225,7 +251,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)  	/* do nothing if list empty */  	if (!list_empty(&sem->wait_list)) -		sem = __rwsem_do_wake(sem, 0); +		sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);  	spin_unlock_irqrestore(&sem->wait_lock, flags); @@ -245,7 +271,7 @@ asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)  	/* do nothing if list empty */  	if (!list_empty(&sem->wait_list)) -		sem = __rwsem_do_wake(sem, 1); +		sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);  	spin_unlock_irqrestore(&sem->wait_lock, flags); diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 9afa25b52a83..a5ec42868f99 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -10,6 +10,7 @@  #include <linux/slab.h>  #include <linux/scatterlist.h>  #include <linux/highmem.h> +#include <linux/kmemleak.h>  /**   * sg_next - return the next scatterlist entry in a list @@ -115,17 +116,29 @@ EXPORT_SYMBOL(sg_init_one);   */  static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)  { -	if (nents == SG_MAX_SINGLE_ALLOC) -		return (struct scatterlist *) __get_free_page(gfp_mask); -	else +	if (nents == SG_MAX_SINGLE_ALLOC) { +		/* +		 * Kmemleak doesn't track page allocations as they are not +		 * commonly used (in a raw form) for kernel data structures. +		 * As we chain together a list of pages and then a normal +		 * kmalloc (tracked by kmemleak), in order to for that last +		 * allocation not to become decoupled (and thus a +		 * false-positive) we need to inform kmemleak of all the +		 * intermediate allocations. +		 */ +		void *ptr = (void *) __get_free_page(gfp_mask); +		kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask); +		return ptr; +	} else  		return kmalloc(nents * sizeof(struct scatterlist), gfp_mask);  }  static void sg_kfree(struct scatterlist *sg, unsigned int nents)  { -	if (nents == SG_MAX_SINGLE_ALLOC) +	if (nents == SG_MAX_SINGLE_ALLOC) { +		kmemleak_free(sg);  		free_page((unsigned long) sg); -	else +	} else  		kfree(sg);  } diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 4ee19d0d3910..7af9d841c43b 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -146,19 +146,16 @@ int strict_strtoul(const char *cp, unsigned int base, unsigned long *res)  {  	char *tail;  	unsigned long val; -	size_t len;  	*res = 0; -	len = strlen(cp); -	if (len == 0) +	if (!*cp)  		return -EINVAL;  	val = simple_strtoul(cp, &tail, base);  	if (tail == cp)  		return -EINVAL; -	if ((*tail == '\0') || -		((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) { +	if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {  		*res = val;  		return 0;  	} @@ -220,18 +217,15 @@ int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res)  {  	char *tail;  	unsigned long long val; -	size_t len;  	*res = 0; -	len = strlen(cp); -	if (len == 0) +	if (!*cp)  		return -EINVAL;  	val = simple_strtoull(cp, &tail, base);  	if (tail == cp)  		return -EINVAL; -	if ((*tail == '\0') || -		((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) { +	if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {  		*res = val;  		return 0;  	}  | 
