From 504d7cf10ee42bb76b9556859f23d4121dee0a77 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 12 Feb 2010 17:19:19 -0500 Subject: nmi_watchdog: Compile and portability fixes The original patch was x86_64 centric. Changed the code to make it less so. ested by building and running on a powerpc. Signed-off-by: Don Zickus Cc: peterz@infradead.org Cc: gorcunov@gmail.com Cc: aris@redhat.com LKML-Reference: <1266013161-31197-2-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 93da9c3f3341..5b41b0feb6db 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -17,7 +17,9 @@ int do_nmi_callback(struct pt_regs *regs, int cpu); extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern int check_nmi_watchdog(void); +#if !defined(CONFIG_NMI_WATCHDOG) extern int nmi_watchdog_enabled; +#endif extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int reserve_perfctr_nmi(unsigned int); extern void release_perfctr_nmi(unsigned int); -- cgit From 58687acba59266735adb8ccd9b5b9aa2c7cd205b Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 7 May 2010 17:11:44 -0400 Subject: lockup_detector: Combine nmi_watchdog and softlockup detector The new nmi_watchdog (which uses the perf event subsystem) is very similar in structure to the softlockup detector. Using Ingo's suggestion, I combined the two functionalities into one file: kernel/watchdog.c. Now both the nmi_watchdog (or hardlockup detector) and softlockup detector sit on top of the perf event subsystem, which is run every 60 seconds or so to see if there are any lockups. To detect hardlockups, cpus not responding to interrupts, I implemented an hrtimer that runs 5 times for every perf event overflow event. If that stops counting on a cpu, then the cpu is most likely in trouble. To detect softlockups, tasks not yielding to the scheduler, I used the previous kthread idea that now gets kicked every time the hrtimer fires. If the kthread isn't being scheduled neither is anyone else and the warning is printed to the console. I tested this on x86_64 and both the softlockup and hardlockup paths work. V2: - cleaned up the Kconfig and softlockup combination - surrounded hardlockup cases with #ifdef CONFIG_PERF_EVENTS_NMI - seperated out the softlockup case from perf event subsystem - re-arranged the enabling/disabling nmi watchdog from proc space - added cpumasks for hardlockup failure cases - removed fallback to soft events if no PMU exists for hard events V3: - comment cleanups - drop support for older softlockup code - per_cpu cleanups - completely remove software clock base hardlockup detector - use per_cpu masking on hard/soft lockup detection - #ifdef cleanups - rename config option NMI_WATCHDOG to LOCKUP_DETECTOR - documentation additions V4: - documentation fixes - convert per_cpu to __get_cpu_var - powerpc compile fixes V5: - split apart warn flags for hard and soft lockups TODO: - figure out how to make an arch-agnostic clock2cycles call (if possible) to feed into perf events as a sample period [fweisbec: merged conflict patch] Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap LKML-Reference: <1273266711-18706-2-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/nmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 5b41b0feb6db..932f0f86b4b7 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -17,7 +17,7 @@ int do_nmi_callback(struct pt_regs *regs, int cpu); extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern int check_nmi_watchdog(void); -#if !defined(CONFIG_NMI_WATCHDOG) +#if !defined(CONFIG_LOCKUP_DETECTOR) extern int nmi_watchdog_enabled; #endif extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); -- cgit From a0c173bd8a3fd0541be8e4ef962170e48d8811c7 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 19 May 2010 12:01:24 -0700 Subject: x86, mrst: add cpu type detection Medfield is the follow-up of Moorestown, it is treated under the same HW sub-architecture. However, we do need to know the CPU type in order for some of the driver to act accordingly. We also have different optimal clock configuration for each CPU type. Signed-off-by: Jacob Pan LKML-Reference: <1274295685-6774-3-git-send-email-jacob.jun.pan@linux.intel.com> Acked-by: Thomas Gleixner Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mrst.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h index 451d30e7f62d..dc5c8500bfc7 100644 --- a/arch/x86/include/asm/mrst.h +++ b/arch/x86/include/asm/mrst.h @@ -11,8 +11,27 @@ #ifndef _ASM_X86_MRST_H #define _ASM_X86_MRST_H extern int pci_mrst_init(void); +extern int mrst_identify_cpu(void); int __init sfi_parse_mrtc(struct sfi_table_header *table); +/* + * Medfield is the follow-up of Moorestown, it combines two chip solution into + * one. Other than that it also added always-on and constant tsc and lapic + * timers. Medfield is the platform name, and the chip name is called Penwell + * we treat Medfield/Penwell as a variant of Moorestown. Penwell can be + * identified via MSRs. + */ +enum mrst_cpu_type { + MRST_CPU_CHIP_LINCROFT = 1, + MRST_CPU_CHIP_PENWELL, +}; + +enum mrst_timer_options { + MRST_TIMER_DEFAULT, + MRST_TIMER_APBT_ONLY, + MRST_TIMER_LAPIC_APBT, +}; + #define SFI_MTMR_MAX_NUM 8 #define SFI_MRTC_MAX 8 -- cgit From a875c01944f0d750eeb1ef3133feceb13f13c4b3 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 19 May 2010 12:01:25 -0700 Subject: x86, mrst: add more timer config options Always-on local APIC timer (ARAT) has been introduced to Medfield, along with the platform APB timers we have more timer configuration options between Moorestown and Medfield. This patch adds run-time detection of avaiable timer features so that we can treat Medfield as a variant of Moorestown and set up the optimal timer options for each platform. i.e. Medfield: per cpu always-on local APIC timer Moorestown: per cpu APB timer Manual override is possible via cmdline option x86_mrst_timer. Signed-off-by: Jacob Pan LKML-Reference: <1274295685-6774-4-git-send-email-jacob.jun.pan@linux.intel.com> Acked-by: Thomas Gleixner Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/apb_timer.h | 1 - arch/x86/include/asm/mrst.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h index c74a2eebe570..a69b1ac9eaf8 100644 --- a/arch/x86/include/asm/apb_timer.h +++ b/arch/x86/include/asm/apb_timer.h @@ -55,7 +55,6 @@ extern unsigned long apbt_quick_calibrate(void); extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu); extern void apbt_setup_secondary_clock(void); extern unsigned int boot_cpu_id; -extern int disable_apbt_percpu; extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint); extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr); diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h index dc5c8500bfc7..67ad31545778 100644 --- a/arch/x86/include/asm/mrst.h +++ b/arch/x86/include/asm/mrst.h @@ -12,6 +12,7 @@ #define _ASM_X86_MRST_H extern int pci_mrst_init(void); extern int mrst_identify_cpu(void); +extern int mrst_timer_options __cpuinitdata; int __init sfi_parse_mrtc(struct sfi_table_header *table); /* -- cgit From a75af580bb1fd261bf63cc00e4b324e17ceb15cf Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 19 May 2010 13:40:14 -0700 Subject: x86, mrst: make mrst_identify_cpu() an inline returning enum We have an enum, might as well use it. While we're at it, make it an inline... there is really no point in calling a function for this stuff. LKML-Reference: <1274295685-6774-3-git-send-email-jacob.jun.pan@linux.intel.com> Signed-off-by: H. Peter Anvin Acked-by: Thomas Gleixner Cc: Jacob Pan --- arch/x86/include/asm/mrst.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h index 67ad31545778..1869c18d15ca 100644 --- a/arch/x86/include/asm/mrst.h +++ b/arch/x86/include/asm/mrst.h @@ -11,7 +11,6 @@ #ifndef _ASM_X86_MRST_H #define _ASM_X86_MRST_H extern int pci_mrst_init(void); -extern int mrst_identify_cpu(void); extern int mrst_timer_options __cpuinitdata; int __init sfi_parse_mrtc(struct sfi_table_header *table); @@ -27,6 +26,12 @@ enum mrst_cpu_type { MRST_CPU_CHIP_PENWELL, }; +extern enum mrst_cpu_type __mrst_cpu_chip; +static enum mrst_cpu_type mrst_identify_cpu(void) +{ + return __mrst_cpu_chip; +} + enum mrst_timer_options { MRST_TIMER_DEFAULT, MRST_TIMER_APBT_ONLY, -- cgit From 14671386dcbafb3086bbda3cb6f9f27d34c7bf6d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 19 May 2010 14:37:40 -0700 Subject: x86, mrst: make mrst_timer_options an enum We have an enum mrst_timer_options, use it so that the kernel knows if we're missing something from a switch statement or equivalent. Signed-off-by: H. Peter Anvin LKML-Reference: <1274295685-6774-4-git-send-email-jacob.jun.pan@linux.intel.com> Cc: Thomas Gleixner Cc: Jacob Pan --- arch/x86/include/asm/mrst.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h index 1869c18d15ca..16350740edf6 100644 --- a/arch/x86/include/asm/mrst.h +++ b/arch/x86/include/asm/mrst.h @@ -11,7 +11,6 @@ #ifndef _ASM_X86_MRST_H #define _ASM_X86_MRST_H extern int pci_mrst_init(void); -extern int mrst_timer_options __cpuinitdata; int __init sfi_parse_mrtc(struct sfi_table_header *table); /* @@ -38,6 +37,8 @@ enum mrst_timer_options { MRST_TIMER_LAPIC_APBT, }; +extern enum mrst_timer_options mrst_timer_options; + #define SFI_MTMR_MAX_NUM 8 #define SFI_MRTC_MAX 8 -- cgit From c0011dbfce69467b23b08fb4a64c39a409a935fb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 4 Feb 2010 14:46:34 -0800 Subject: xen: use _PAGE_IOMAP in ioremap to do machine mappings In a Xen domain, ioremap operates on machine addresses, not pseudo-physical addresses. We use _PAGE_IOMAP to determine whether a mapping is intended for machine addresses. [ Impact: allow Xen domain to map real hardware ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/xen/page.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 018a0a400799..bf5f7d32bd08 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -112,13 +112,9 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine) */ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) { - extern unsigned long max_mapnr; unsigned long pfn = mfn_to_pfn(mfn); - if ((pfn < max_mapnr) - && !xen_feature(XENFEAT_auto_translated_physmap) - && (get_phys_to_machine(pfn) != mfn)) - return max_mapnr; /* force !pfn_valid() */ - /* XXX fixme; not true with sparsemem */ + if (get_phys_to_machine(pfn) != mfn) + return -1; /* force !pfn_valid() */ return pfn; } -- cgit From e768aee89c687a50e6a2110e30c5cae1fbf0d2da Mon Sep 17 00:00:00 2001 From: Livio Soares Date: Thu, 3 Jun 2010 15:00:31 -0400 Subject: perf, x86: Small fix to cpuid10_edx Fixes to 'cpuid10_edx' to comply with Intel documentation. According to the Intel Manual, Volume 2A, Table 3-12, the cpuid for architecture performance monitoring returns, in EDX, two pieces of information: 1) Number of fixed-function counters (5 bits, not 4) 2) Width of fixed-function counters (8 bits) Signed-off-by: Livio Soares Acked-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Arjan van de Ven Cc: "H. Peter Anvin" LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 254883d0c7e0..6ed3ae4f5482 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -68,8 +68,9 @@ union cpuid10_eax { union cpuid10_edx { struct { - unsigned int num_counters_fixed:4; - unsigned int reserved:28; + unsigned int num_counters_fixed:5; + unsigned int bit_width_fixed:8; + unsigned int reserved:19; } split; unsigned int full; }; -- cgit From 12a6611fa16e9c6d2f844fe2175d219c6e9bd95d Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Wed, 2 Jun 2010 16:22:01 -0500 Subject: x86, UV: Calculate BAU destination timeout Calculate the Broadcast Assist Unit's destination timeout period from the values in the relevant MMR's. Store it in each cpu's per-cpu BAU structure so that a destination timeout can be differentiated from a 'plugged' situation in which all software ack resources are already allocated and a timeout is pending. That case returns an immediate destination error. Signed-off-by: Cliff Wickman Cc: gregkh@suse.de LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index aa558ac0306e..458e04c626a2 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -49,6 +49,18 @@ #define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15 #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16 #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL +/* [19:16] SOFT_ACK timeout period 19: 1 is urgency 7 17:16 1 is multiplier */ +#define BAU_MISC_CONTROL_MULT_MASK 3 + +#define UVH_AGING_PRESCALE_SEL 0x000000b000UL +/* [30:28] URGENCY_7 an index into a table of times */ +#define BAU_URGENCY_7_SHIFT 28 +#define BAU_URGENCY_7_MASK 7 + +#define UVH_TRANSACTION_TIMEOUT 0x000000b200UL +/* [45:40] BAU - BAU transaction timeout select - a multiplier */ +#define BAU_TRANS_SHIFT 40 +#define BAU_TRANS_MASK 0x3f /* * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 -- cgit From e8e5e8a8048006a12d7777a93baebd6e39496101 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Wed, 2 Jun 2010 16:22:01 -0500 Subject: x86, UV: BAU tunables into a debugfs file Make the Broadcast Assist Unit driver's nine tuning values variable by making them accessible through a read/write debugfs file. The file will normally be mounted as /sys/kernel/debug/sgi_uv/bau_tunables. The tunables are kept in each cpu's per-cpu BAU structure. The patch also does a little name improvement, and corrects the reset of two destination timeout counters. Signed-off-by: Cliff Wickman Cc: gregkh@suse.de LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 53 ++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 16 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 458e04c626a2..e5543c1a80ca 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -45,10 +45,14 @@ #define UV_DESC_BASE_PNODE_SHIFT 49 #define UV_PAYLOADQ_PNODE_SHIFT 49 #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" +#define UV_BAU_BASENAME "sgi_uv/bau_tunables" +#define UV_BAU_TUNABLES_DIR "sgi_uv" +#define UV_BAU_TUNABLES_FILE "bau_tunables" +#define WHITESPACE " \t\n" #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) #define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15 #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16 -#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL +#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x0000000009UL /* [19:16] SOFT_ACK timeout period 19: 1 is urgency 7 17:16 1 is multiplier */ #define BAU_MISC_CONTROL_MULT_MASK 3 @@ -70,25 +74,23 @@ #define DESC_STATUS_DESTINATION_TIMEOUT 2 #define DESC_STATUS_SOURCE_TIMEOUT 3 +#define TIMEOUT_DELAY 10 /* - * source side threshholds at which message retries print a warning - */ -#define SOURCE_TIMEOUT_LIMIT 20 -#define DESTINATION_TIMEOUT_LIMIT 20 - -/* - * misc. delays, in microseconds + * delay for 'plugged' timeout retries, in microseconds */ -#define THROTTLE_DELAY 10 -#define TIMEOUT_DELAY 10 -#define BIOS_TO 1000 -/* BIOS is assumed to set the destination timeout to 1003520 nanoseconds */ +#define PLUGGED_DELAY 10 /* * threshholds at which to use IPI to free resources */ +/* after this # consecutive 'plugged' timeouts, use IPI to release resources */ #define PLUGSB4RESET 100 -#define TIMEOUTSB4RESET 100 +/* after this many consecutive timeouts, use IPI to release resources */ +#define TIMEOUTSB4RESET 1 +/* at this number uses of IPI to release resources, giveup the request */ +#define IPI_RESET_LIMIT 1 +/* after this # consecutive successes, bump up the throttle if it was lowered */ +#define COMPLETE_THRESHOLD 5 /* * number of entries in the destination side payload queue @@ -107,6 +109,13 @@ #define FLUSH_GIVEUP 3 #define FLUSH_COMPLETE 4 +/* + * tuning the action when the numalink network is extremely delayed + */ +#define CONGESTED_RESPONSE_US 1000 /* 'long' response time, in microseconds */ +#define CONGESTED_REPS 10 /* long delays averaged over this many broadcasts */ +#define CONGESTED_PERIOD 30 /* time for the bau to be disabled, in seconds */ + /* * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) * If the 'multilevel' flag in the header portion of the descriptor @@ -323,14 +332,13 @@ struct bau_control { struct bau_control *uvhub_master; struct bau_control *socket_master; unsigned long timeout_interval; + unsigned long set_bau_on_time; atomic_t active_descriptor_count; - int max_concurrent; - int max_concurrent_constant; - int retry_message_scans; int plugged_tries; int timeout_tries; int ipi_attempts; int conseccompletes; + int set_bau_off; short cpu; short uvhub_cpu; short uvhub; @@ -343,6 +351,19 @@ struct bau_control { spinlock_t masks_lock; spinlock_t uvhub_lock; spinlock_t queue_lock; + /* tunables */ + int max_bau_concurrent; + int max_bau_concurrent_constant; + int plugged_delay; + int plugsb4reset; + int timeoutsb4reset; + int ipi_reset_limit; + int complete_threshold; + int congested_response_us; + int congested_reps; + int congested_period; + cycles_t period_time; + long period_requests; }; /* -- cgit From 50fb55acc5bbe5ee29d0a65262f4ec286b14d156 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Wed, 2 Jun 2010 16:22:02 -0500 Subject: x86, UV: Disable BAU on network congestion The numalink network can become so congested that TLB shootdown using the Broadcast Assist Unit becomes slower than using IPI's. In that case, disable the use of the BAU for a period of time. The period is tunable. When the period expires the use of the BAU is re-enabled. A count of these actions is added to the statistics file. Signed-off-by: Cliff Wickman Cc: gregkh@suse.de LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index e5543c1a80ca..9b3e750ef2d8 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -34,6 +34,7 @@ */ #define UV_ITEMS_PER_DESCRIPTOR 8 +/* the 'throttle' to prevent the hardware stay-busy bug */ #define MAX_BAU_CONCURRENT 3 #define UV_CPUS_PER_ACT_STATUS 32 #define UV_ACT_STATUS_MASK 0x3 @@ -338,6 +339,7 @@ struct bau_control { int timeout_tries; int ipi_attempts; int conseccompletes; + int baudisabled; int set_bau_off; short cpu; short uvhub_cpu; @@ -389,6 +391,8 @@ struct ptc_stats { unsigned long s_busy; /* status stayed busy past s/w timer */ unsigned long s_throttles; /* waits in throttle */ unsigned long s_retry_messages; /* retry broadcasts */ + unsigned long s_bau_reenabled; /* for bau enable/disable */ + unsigned long s_bau_disabled; /* for bau enable/disable */ /* destination statistics */ unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */ unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */ -- cgit From 712157aa703a01f58c7c17452096ab00b774d0a9 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Wed, 2 Jun 2010 16:22:02 -0500 Subject: x86, UV: Shorten access to BAU statistics structure Use a pointer from the per-cpu BAU control structure to the per-cpu BAU statistics structure. We nearly always know the first before needing the second. Signed-off-by: Cliff Wickman Cc: gregkh@suse.de LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 9b3e750ef2d8..6a42d42eb8f9 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -332,6 +332,7 @@ struct bau_control { struct bau_payload_queue_entry *bau_msg_head; struct bau_control *uvhub_master; struct bau_control *socket_master; + struct ptc_stats *statp; unsigned long timeout_interval; unsigned long set_bau_on_time; atomic_t active_descriptor_count; -- cgit From 4faca1550838708d71f6eea14cdacb0876c3a5a4 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Wed, 2 Jun 2010 16:22:02 -0500 Subject: x86, UV: BAU structure rearranging Move some structure definitions from the C code to the BAU header file, and change the organization of that header file a little. Signed-off-by: Cliff Wickman Cc: gregkh@suse.de LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 90 +++++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 39 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 6a42d42eb8f9..1c8f1e9bf74f 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -322,6 +322,57 @@ struct bau_payload_queue_entry { /* bytes 24-31 */ }; +struct msg_desc { + struct bau_payload_queue_entry *msg; + int msg_slot; + int sw_ack_slot; + struct bau_payload_queue_entry *va_queue_first; + struct bau_payload_queue_entry *va_queue_last; +}; + +struct reset_args { + int sender; +}; + +/* + * This structure is allocated per_cpu for UV TLB shootdown statistics. + */ +struct ptc_stats { + /* sender statistics */ + unsigned long s_giveup; /* number of fall backs to IPI-style flushes */ + unsigned long s_requestor; /* number of shootdown requests */ + unsigned long s_stimeout; /* source side timeouts */ + unsigned long s_dtimeout; /* destination side timeouts */ + unsigned long s_time; /* time spent in sending side */ + unsigned long s_retriesok; /* successful retries */ + unsigned long s_ntargcpu; /* total number of cpu's targeted */ + unsigned long s_ntarguvhub; /* total number of uvhubs targeted */ + unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/ + unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */ + unsigned long s_ntarguvhub4; /* number of times target hubs >= 4 */ + unsigned long s_ntarguvhub2; /* number of times target hubs >= 2 */ + unsigned long s_ntarguvhub1; /* number of times target hubs == 1 */ + unsigned long s_resets_plug; /* ipi-style resets from plug state */ + unsigned long s_resets_timeout; /* ipi-style resets from timeouts */ + unsigned long s_busy; /* status stayed busy past s/w timer */ + unsigned long s_throttles; /* waits in throttle */ + unsigned long s_retry_messages; /* retry broadcasts */ + unsigned long s_bau_reenabled; /* for bau enable/disable */ + unsigned long s_bau_disabled; /* for bau enable/disable */ + /* destination statistics */ + unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */ + unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */ + unsigned long d_multmsg; /* interrupts with multiple messages */ + unsigned long d_nomsg; /* interrupts with no message */ + unsigned long d_time; /* time spent on destination side */ + unsigned long d_requestee; /* number of messages processed */ + unsigned long d_retries; /* number of retry messages processed */ + unsigned long d_canceled; /* number of messages canceled by retries */ + unsigned long d_nocanceled; /* retries that found nothing to cancel */ + unsigned long d_resets; /* number of ipi-style requests processed */ + unsigned long d_rcanceled; /* number of messages canceled by resets */ +}; + /* * one per-cpu; to locate the software tables */ @@ -369,45 +420,6 @@ struct bau_control { long period_requests; }; -/* - * This structure is allocated per_cpu for UV TLB shootdown statistics. - */ -struct ptc_stats { - /* sender statistics */ - unsigned long s_giveup; /* number of fall backs to IPI-style flushes */ - unsigned long s_requestor; /* number of shootdown requests */ - unsigned long s_stimeout; /* source side timeouts */ - unsigned long s_dtimeout; /* destination side timeouts */ - unsigned long s_time; /* time spent in sending side */ - unsigned long s_retriesok; /* successful retries */ - unsigned long s_ntargcpu; /* number of cpus targeted */ - unsigned long s_ntarguvhub; /* number of uvhubs targeted */ - unsigned long s_ntarguvhub16; /* number of times >= 16 target hubs */ - unsigned long s_ntarguvhub8; /* number of times >= 8 target hubs */ - unsigned long s_ntarguvhub4; /* number of times >= 4 target hubs */ - unsigned long s_ntarguvhub2; /* number of times >= 2 target hubs */ - unsigned long s_ntarguvhub1; /* number of times == 1 target hub */ - unsigned long s_resets_plug; /* ipi-style resets from plug state */ - unsigned long s_resets_timeout; /* ipi-style resets from timeouts */ - unsigned long s_busy; /* status stayed busy past s/w timer */ - unsigned long s_throttles; /* waits in throttle */ - unsigned long s_retry_messages; /* retry broadcasts */ - unsigned long s_bau_reenabled; /* for bau enable/disable */ - unsigned long s_bau_disabled; /* for bau enable/disable */ - /* destination statistics */ - unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */ - unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */ - unsigned long d_multmsg; /* interrupts with multiple messages */ - unsigned long d_nomsg; /* interrupts with no message */ - unsigned long d_time; /* time spent on destination side */ - unsigned long d_requestee; /* number of messages processed */ - unsigned long d_retries; /* number of retry messages processed */ - unsigned long d_canceled; /* number of messages canceled by retries */ - unsigned long d_nocanceled; /* retries that found nothing to cancel */ - unsigned long d_resets; /* number of ipi-style requests processed */ - unsigned long d_rcanceled; /* number of messages canceled by resets */ -}; - static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) { return constant_test_bit(uvhub, &dstp->bits[0]); -- cgit From 90cc7d944981a6d06b49bb26fde1b490e28c90e5 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Wed, 2 Jun 2010 16:22:02 -0500 Subject: x86, UV: Remove BAU check for stay-busy Remove a faulty assumption that a long running BAU request has encountered a hardware problem and will never finish. Numalink congestion can make a request appear to have encountered such a problem, but it is not safe to cancel the request. If such a cancel is done but a reply is later received we can miss a TLB shootdown. We depend upon the max_bau_concurrent 'throttle' to prevent the stay-busy case from happening. Signed-off-by: Cliff Wickman Cc: gregkh@suse.de LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 1c8f1e9bf74f..c19b870ea58a 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -402,7 +402,6 @@ struct bau_control { unsigned short uvhub_quiesce; short socket_acknowledge_count[DEST_Q_SIZE]; cycles_t send_message; - spinlock_t masks_lock; spinlock_t uvhub_lock; spinlock_t queue_lock; /* tunables */ -- cgit From 450a007eebaf430426ea8f89bbc3f287949905b2 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Wed, 2 Jun 2010 16:22:02 -0500 Subject: x86, UV: BAU broadcast to the local hub Make the Broadcast Assist Unit driver use the BAU for TLB shootdowns of cpu's on the local uvhub. It was previously thought that IPI might be faster to the cpu's on the local hub. But the IPI operation would have to follow the completion of the BAU broadcast anyway. So we broadcast to the local uvhub in all cases except when the current cpu was the only local cpu in the mask. This simplifies uv_flush_send_and_wait() in that it returns either all shootdowns complete, or none. Adjust the statistics to account for shootdowns on the local uvhub. Signed-off-by: Cliff Wickman Cc: gregkh@suse.de LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index c19b870ea58a..7f6ea611cb71 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -346,6 +346,11 @@ struct ptc_stats { unsigned long s_time; /* time spent in sending side */ unsigned long s_retriesok; /* successful retries */ unsigned long s_ntargcpu; /* total number of cpu's targeted */ + unsigned long s_ntargself; /* times the sending cpu was targeted */ + unsigned long s_ntarglocals; /* targets of cpus on the local blade */ + unsigned long s_ntargremotes; /* targets of cpus on remote blades */ + unsigned long s_ntarglocaluvhub; /* targets of the local hub */ + unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */ unsigned long s_ntarguvhub; /* total number of uvhubs targeted */ unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/ unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */ -- cgit From f6d8a56693426b1f29ff5cafda8be0d65e4e1870 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Wed, 2 Jun 2010 16:22:02 -0500 Subject: x86, UV: Modularize BAU send and wait Streamline the large uv_flush_send_and_wait() function by use of a couple of helper functions. And remove some excess comments. Signed-off-by: Cliff Wickman Cc: gregkh@suse.de LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 7f6ea611cb71..42d412fd8b02 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -75,7 +75,6 @@ #define DESC_STATUS_DESTINATION_TIMEOUT 2 #define DESC_STATUS_SOURCE_TIMEOUT 3 -#define TIMEOUT_DELAY 10 /* * delay for 'plugged' timeout retries, in microseconds */ -- cgit From c9cf4dbb4d9ca715d8fedf13301a53296429abc6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 19 May 2010 21:35:17 +0200 Subject: x86: Unify dumpstack.h and stacktrace.h arch/x86/include/asm/stacktrace.h and arch/x86/kernel/dumpstack.h declare headers of objects that deal with the same topic. Actually most of the files that include stacktrace.h also include dumpstack.h Although dumpstack.h seems more reserved for internals of stack traces, those are quite often needed to define specialized stack trace operations. And perf event arch headers are going to need access to such low level operations anyway. So don't continue to bother with dumpstack.h as it's not anymore about isolated deep internals. v2: fix struct stack_frame definition conflict in sysprof Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Thomas Gleixner Cc: Soeren Sandmann --- arch/x86/include/asm/stacktrace.h | 52 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 4dab78edbad9..a957463d3c7a 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -1,6 +1,13 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs + */ + #ifndef _ASM_X86_STACKTRACE_H #define _ASM_X86_STACKTRACE_H +#include + extern int kstack_depth_to_print; struct thread_info; @@ -42,4 +49,49 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data); +#ifdef CONFIG_X86_32 +#define STACKSLOTS_PER_LINE 8 +#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) +#else +#define STACKSLOTS_PER_LINE 4 +#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) +#endif + +extern void +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, char *log_lvl); + +extern void +show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *sp, unsigned long bp, char *log_lvl); + +extern unsigned int code_bytes; + +/* The form of the top of the frame on the stack */ +struct stack_frame { + struct stack_frame *next_frame; + unsigned long return_address; +}; + +struct stack_frame_ia32 { + u32 next_frame; + u32 return_address; +}; + +static inline unsigned long rewind_frame_pointer(int n) +{ + struct stack_frame *frame; + + get_bp(frame); + +#ifdef CONFIG_FRAME_POINTER + while (n--) { + if (probe_kernel_address(&frame->next_frame, frame)) + break; + } +#endif + + return (unsigned long)frame; +} + #endif /* _ASM_X86_STACKTRACE_H */ -- cgit From b0f82b81fe6bbcf78d478071f33e44554726bc81 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 20 May 2010 07:47:21 +0200 Subject: perf: Drop the skip argument from perf_arch_fetch_regs_caller Drop this argument now that we always want to rewind only to the state of the first caller. It means frame pointers are not necessary anymore to reliably get the source of an event. But this also means we need this helper to be a macro now, as an inline function is not an option since we need to know when to provide a default implentation. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul Mackerras Cc: David Miller Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo --- arch/x86/include/asm/perf_event.h | 13 +++++++++++++ arch/x86/include/asm/stacktrace.h | 7 ++----- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 254883d0c7e0..02de29830ffe 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -140,6 +140,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) +#include + +/* + * We abuse bit 3 from flags to pass exact information, see perf_misc_flags + * and the comment with PERF_EFLAGS_EXACT. + */ +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->ip = (__ip); \ + (regs)->bp = caller_frame_pointer(); \ + (regs)->cs = __KERNEL_CS; \ + regs->flags = 0; \ +} + #else static inline void init_hw_perf_events(void) { } static inline void perf_events_lapic_init(void) { } diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index a957463d3c7a..2b16a2ad23dc 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -78,17 +78,14 @@ struct stack_frame_ia32 { u32 return_address; }; -static inline unsigned long rewind_frame_pointer(int n) +static inline unsigned long caller_frame_pointer(void) { struct stack_frame *frame; get_bp(frame); #ifdef CONFIG_FRAME_POINTER - while (n--) { - if (probe_kernel_address(&frame->next_frame, frame)) - break; - } + frame = frame->next_frame; #endif return (unsigned long)frame; -- cgit From 1996bda2a42480c275656233e631ee0966574be4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 May 2010 14:05:13 +0200 Subject: arch: Implement local64_t On 64bit, local_t is of size long, and thus we make local64_t an alias. On 32bit, we fall back to atomic64_t. (architecture can provide optimized 32-bit version) (This new facility is to be used by perf events optimizations.) Signed-off-by: Peter Zijlstra Cc: linux-arch@vger.kernel.org Cc: Andrew Morton Cc: Linus Torvalds LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/local64.h | 1 + 1 file changed, 1 insertion(+) create mode 100644 arch/x86/include/asm/local64.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/x86/include/asm/local64.h @@ -0,0 +1 @@ +#include -- cgit From 1f9a0bd4989fd16842ad71fc89240b48ab191446 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 8 Jun 2010 14:09:08 +0800 Subject: x86, mce: Rename MSR_IA32_MCx_CTL2 value Rename CMCI_EN to MCI_CTL2_CMCI_EN and CMCI_THRESHOLD_MASK to MCI_CTL2_CMCI_THRESHOLD_MASK to make naming consistent. Signed-off-by: Huang Ying LKML-Reference: <1275977348.3444.659.camel@yhuang-dev.sh.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 4 ++++ arch/x86/include/asm/msr-index.h | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index f32a4301c4d4..82db1d8f064b 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -38,6 +38,10 @@ #define MCM_ADDR_MEM 3 /* memory address */ #define MCM_ADDR_GENERIC 7 /* generic */ +/* CTL2 register defines */ +#define MCI_CTL2_CMCI_EN (1ULL << 30) +#define MCI_CTL2_CMCI_THRESHOLD_MASK 0xffffULL + #define MCJ_CTX_MASK 3 #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) #define MCJ_CTX_RANDOM 0 /* inject context: random */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b49d8ca228f6..38f66eb58541 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -94,9 +94,6 @@ #define MSR_IA32_MC0_CTL2 0x00000280 #define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) -#define CMCI_EN (1ULL << 30) -#define CMCI_THRESHOLD_MASK 0xffffULL - #define MSR_P6_PERFCTR0 0x000000c1 #define MSR_P6_PERFCTR1 0x000000c2 #define MSR_P6_EVNTSEL0 0x00000186 -- cgit From 3c417588603e5411f29d22a40f3b5ff71529a4f0 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 8 Jun 2010 14:09:10 +0800 Subject: x86, mce: Fix MSR_IA32_MCI_CTL2 CMCI threshold setup It is reported that CMCI is not raised when number of corrected error reaches preset threshold. After inspection, it is found that MSR_IA32_MCI_CTL2 threshold field is not setup properly. This patch fixed it. Value of MCI_CTL2_CMCI_THRESHOLD_MASK is fixed according to x86_64 Software Developer's Manual too. Reported-by: Shaohui Zheng Signed-off-by: Huang Ying LKML-Reference: <1275977350.3444.660.camel@yhuang-dev.sh.intel.com> Reviewed-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 82db1d8f064b..c62c13cb9788 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -40,7 +40,7 @@ /* CTL2 register defines */ #define MCI_CTL2_CMCI_EN (1ULL << 30) -#define MCI_CTL2_CMCI_THRESHOLD_MASK 0xffffULL +#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL #define MCJ_CTX_MASK 3 #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) -- cgit From 23016bf0d25d62c45d8b8f61d55b290d704f7a79 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Thu, 3 Jun 2010 23:22:28 -0400 Subject: x86: Look for IA32_ENERGY_PERF_BIAS support The new IA32_ENERGY_PERF_BIAS MSR allows system software to give hardware a hint whether OS policy favors more power saving, or more performance. This allows the OS to have some influence on internal hardware power/performance tradeoffs where the OS has previously had no influence. The support for this feature is indicated by CPUID.06H.ECX.bit3, as documented in the Intel Architectures Software Developer's Manual. This patch discovers support of this feature and displays it as "epb" in /proc/cpuinfo. Signed-off-by: Venkatesh Pallipadi LKML-Reference: Signed-off-by: Len Brown Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cpufeature.h | 1 + arch/x86/include/asm/msr-index.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 468145914389..2a904f4071fe 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -162,6 +162,7 @@ #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ #define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ #define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */ +#define X86_FEATURE_EPB (7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ /* Virtualization flags: Linux defined */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b49d8ca228f6..e57bc20683d6 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -238,6 +238,8 @@ #define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 +#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 + /* MISC_ENABLE bits: architectural */ #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) #define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) -- cgit From fd699c76552bbfa66631f019be415a87dbb08237 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Fri, 18 Jun 2010 17:46:53 -0400 Subject: x86, olpc: Add support for calling into OpenFirmware Add support for saving OFW's cif, and later calling into it to run OFW commands. OFW remains resident in memory, living within virtual range 0xff800000 - 0xffc00000. A single page directory entry points to the pgdir that OFW actually uses, so rather than saving the entire page table, we grab and install that one entry permanently in the kernel's page table. This is currently only used by the OLPC XO. Note that this particular calling convention breaks PAE and PAT, and so cannot be used on newer x86 hardware. Signed-off-by: Andres Salomon LKML-Reference: <20100618174653.7755a39a@dev.queued.net> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/bootparam.h | 11 ++++++++++- arch/x86/include/asm/olpc_ofw.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 arch/x86/include/asm/olpc_ofw.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h index 6be33d83c716..8e6218550e77 100644 --- a/arch/x86/include/asm/bootparam.h +++ b/arch/x86/include/asm/bootparam.h @@ -70,6 +70,14 @@ struct sys_desc_table { __u8 table[14]; }; +/* Gleaned from OFW's set-parameters in cpu/x86/pc/linux.fth */ +struct olpc_ofw_header { + __u32 ofw_magic; /* OFW signature */ + __u32 ofw_version; + __u32 cif_handler; /* callback into OFW */ + __u32 irq_desc_table; +} __attribute__((packed)); + struct efi_info { __u32 efi_loader_signature; __u32 efi_systab; @@ -92,7 +100,8 @@ struct boot_params { __u8 hd0_info[16]; /* obsolete! */ /* 0x080 */ __u8 hd1_info[16]; /* obsolete! */ /* 0x090 */ struct sys_desc_table sys_desc_table; /* 0x0a0 */ - __u8 _pad4[144]; /* 0x0b0 */ + struct olpc_ofw_header olpc_ofw_header; /* 0x0b0 */ + __u8 _pad4[128]; /* 0x0c0 */ struct edid_info edid_info; /* 0x140 */ struct efi_info efi_info; /* 0x1c0 */ __u32 alt_mem_k; /* 0x1e0 */ diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h new file mode 100644 index 000000000000..3e63d857c48f --- /dev/null +++ b/arch/x86/include/asm/olpc_ofw.h @@ -0,0 +1,31 @@ +#ifndef _ASM_X86_OLPC_OFW_H +#define _ASM_X86_OLPC_OFW_H + +/* index into the page table containing the entry OFW occupies */ +#define OLPC_OFW_PDE_NR 1022 + +#define OLPC_OFW_SIG 0x2057464F /* aka "OFW " */ + +#ifdef CONFIG_OLPC_OPENFIRMWARE + +/* run an OFW command by calling into the firmware */ +#define olpc_ofw(name, args, res) \ + __olpc_ofw((name), ARRAY_SIZE(args), args, ARRAY_SIZE(res), res) + +extern int __olpc_ofw(const char *name, int nr_args, void **args, int nr_res, + void **res); + +/* determine whether OFW is available and lives in the proper memory */ +extern void olpc_ofw_detect(void); + +/* install OFW's pde permanently into the kernel's pgtable */ +extern void setup_olpc_ofw_pgd(void); + +#else /* !CONFIG_OLPC_OPENFIRMWARE */ + +static inline void olpc_ofw_detect(void) { } +static inline void setup_olpc_ofw_pgd(void) { } + +#endif /* !CONFIG_OLPC_OPENFIRMWARE */ + +#endif /* _ASM_X86_OLPC_OFW_H */ -- cgit From f7809daf64bf119fef70af172db6a0636fa51f92 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 24 Jun 2010 10:00:24 +0200 Subject: x86: Support for instruction breakpoints Instruction breakpoints need to have a specific length of 0 to be working. Bring this support but also take care the user is not trying to set an unsupported length, like a range breakpoint for example. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Prasad Cc: Mahesh Salgaonkar Cc: Will Deacon Cc: Jason Wessel --- arch/x86/include/asm/hw_breakpoint.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 942255310e6a..528a11e8d3e3 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -20,10 +20,10 @@ struct arch_hw_breakpoint { #include /* Available HW breakpoint length encodings */ +#define X86_BREAKPOINT_LEN_X 0x00 #define X86_BREAKPOINT_LEN_1 0x40 #define X86_BREAKPOINT_LEN_2 0x44 #define X86_BREAKPOINT_LEN_4 0x4c -#define X86_BREAKPOINT_LEN_EXECUTE 0x40 #ifdef CONFIG_X86_64 #define X86_BREAKPOINT_LEN_8 0x48 -- cgit From ea812ca1b06113597adcd8e70c0f84a413d97544 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 29 Jun 2010 18:38:00 +0000 Subject: x86: Align skb w/ start of cacheline on newer core 2/Xeon Arch x86 architectures can handle unaligned accesses in hardware, and it has been shown that unaligned DMA accesses can be expensive on Nehalem architectures. As such we should overwrite NET_IP_ALIGN to resolve this issue. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Acked-by: H. Peter Anvin Signed-off-by: David S. Miller --- arch/x86/include/asm/system.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index b8fe48ee2ed9..b4293fc8b798 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -457,4 +457,13 @@ static inline void rdtsc_barrier(void) alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); } +#ifdef CONFIG_MCORE2 +/* + * We handle most unaligned accesses in hardware. On the other hand + * unaligned DMA can be quite expensive on some Nehalem processors. + * + * Based on this we disable the IP header alignment in network drivers. + */ +#define NET_IP_ALIGN 0 +#endif #endif /* _ASM_X86_SYSTEM_H */ -- cgit From 7475271004b66e9c22e1bb28f240a38c5d6fe76e Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 1 Jul 2010 13:28:27 +0000 Subject: x86: Drop CONFIG_MCORE2 check around setting of NET_IP_ALIGN This patch removes the CONFIG_MCORE2 check from around NET_IP_ALIGN. It is based on a suggestion from Andi Kleen. The assumption is that there are not any x86 cores where unaligned access is really slow, and this change would allow for a performance improvement to still exist on configurations that are not necessarily optimized for Core 2. Cc: Andi Kleen Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Acked-by: H. Peter Anvin Signed-off-by: David S. Miller --- arch/x86/include/asm/system.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index b4293fc8b798..1db9bd2281d7 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -457,7 +457,6 @@ static inline void rdtsc_barrier(void) alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); } -#ifdef CONFIG_MCORE2 /* * We handle most unaligned accesses in hardware. On the other hand * unaligned DMA can be quite expensive on some Nehalem processors. @@ -465,5 +464,4 @@ static inline void rdtsc_barrier(void) * Based on this we disable the IP header alignment in network drivers. */ #define NET_IP_ALIGN 0 -#endif #endif /* _ASM_X86_SYSTEM_H */ -- cgit From 39ef13a4ac28aa64cfe1bc36e6e00f1096707a28 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 5 Jul 2010 10:09:29 +0800 Subject: perf, x86: P4 PMU -- redesign cache events To support cache events we have reserved the low 6 bits in hw_perf_event::config (which is a part of CCCR register configuration actually). These bits represent Replay Event mertic enumerated in enum P4_PEBS_METRIC. The caller should not care about which exact bits should be set and how -- the caller just chooses one P4_PEBS_METRIC entity and puts it into the config. The kernel will track it and set appropriate additional MSR registers (metrics) when needed. The reason for this redesign was the PEBS enable bit, which should not be set until DS (and PEBS sampling) support will be implemented properly. TODO ==== - PEBS sampling (note it's tricky and works with _one_ counter only so for HT machines it will be not that easy to handle both threads) - tracking of PEBS registers state, a user might need to turn PEBS off completely (ie no PEBS enable, no UOP_tag) but some other event may need it, such events clashes and should not run simultaneously, at moment we just don't support such events - eventually export user space bits in separate header which will allow user apps to configure raw events more conveniently. Signed-off-by: Cyrill Gorcunov Signed-off-by: Lin Ming Cc: Stephane Eranian Cc: Peter Zijlstra Cc: Frederic Weisbecker LKML-Reference: <1278295769.9540.15.camel@minggr.sh.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event_p4.h | 99 +++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 47 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 64a8ebff06fc..def500776b16 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -19,7 +19,6 @@ #define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) #define ARCH_P4_MAX_CCCR (18) -#define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2) #define P4_ESCR_EVENT_MASK 0x7e000000U #define P4_ESCR_EVENT_SHIFT 25 @@ -71,10 +70,6 @@ #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) -/* Custom bits in reerved CCCR area */ -#define P4_CCCR_CACHE_OPS_MASK 0x0000003fU - - /* Non HT mask */ #define P4_CCCR_MASK \ (P4_CCCR_OVF | \ @@ -106,8 +101,7 @@ * ESCR and CCCR but rather an only packed value should * be unpacked and written to a proper addresses * - * the base idea is to pack as much info as - * possible + * the base idea is to pack as much info as possible */ #define p4_config_pack_escr(v) (((u64)(v)) << 32) #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) @@ -130,8 +124,6 @@ t; \ }) -#define p4_config_unpack_cache_event(v) (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK) - #define P4_CONFIG_HT_SHIFT 63 #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) @@ -214,6 +206,12 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) return escr; } +/* + * This are the events which should be used in "Event Select" + * field of ESCR register, they are like unique keys which allow + * the kernel to determinate which CCCR and COUNTER should be + * used to track an event + */ enum P4_EVENTS { P4_EVENT_TC_DELIVER_MODE, P4_EVENT_BPU_FETCH_REQUEST, @@ -561,7 +559,7 @@ enum P4_EVENT_OPCODES { * a caller should use P4_ESCR_EMASK_NAME helper to * pick the EventMask needed, for example * - * P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD) + * P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) */ enum P4_ESCR_EMASKS { P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0), @@ -753,43 +751,50 @@ enum P4_ESCR_EMASKS { P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1), }; -/* P4 PEBS: stale for a while */ -#define P4_PEBS_METRIC_MASK 0x00001fffU -#define P4_PEBS_UOB_TAG 0x01000000U -#define P4_PEBS_ENABLE 0x02000000U - -/* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */ -#define P4_PEBS__1stl_cache_load_miss_retired 0x3000001 -#define P4_PEBS__2ndl_cache_load_miss_retired 0x3000002 -#define P4_PEBS__dtlb_load_miss_retired 0x3000004 -#define P4_PEBS__dtlb_store_miss_retired 0x3000004 -#define P4_PEBS__dtlb_all_miss_retired 0x3000004 -#define P4_PEBS__tagged_mispred_branch 0x3018000 -#define P4_PEBS__mob_load_replay_retired 0x3000200 -#define P4_PEBS__split_load_retired 0x3000400 -#define P4_PEBS__split_store_retired 0x3000400 - -#define P4_VERT__1stl_cache_load_miss_retired 0x0000001 -#define P4_VERT__2ndl_cache_load_miss_retired 0x0000001 -#define P4_VERT__dtlb_load_miss_retired 0x0000001 -#define P4_VERT__dtlb_store_miss_retired 0x0000002 -#define P4_VERT__dtlb_all_miss_retired 0x0000003 -#define P4_VERT__tagged_mispred_branch 0x0000010 -#define P4_VERT__mob_load_replay_retired 0x0000001 -#define P4_VERT__split_load_retired 0x0000001 -#define P4_VERT__split_store_retired 0x0000002 - -enum P4_CACHE_EVENTS { - P4_CACHE__NONE, - - P4_CACHE__1stl_cache_load_miss_retired, - P4_CACHE__2ndl_cache_load_miss_retired, - P4_CACHE__dtlb_load_miss_retired, - P4_CACHE__dtlb_store_miss_retired, - P4_CACHE__itlb_reference_hit, - P4_CACHE__itlb_reference_miss, - - P4_CACHE__MAX +/* + * P4 PEBS specifics (Replay Event only) + * + * Format (bits): + * 0-6: metric from P4_PEBS_METRIC enum + * 7 : reserved + * 8 : reserved + * 9-11 : reserved + * + * Note we have UOP and PEBS bits reserved for now + * just in case if we will need them once + */ +#define P4_PEBS_CONFIG_ENABLE (1 << 7) +#define P4_PEBS_CONFIG_UOP_TAG (1 << 8) +#define P4_PEBS_CONFIG_METRIC_MASK 0x3f +#define P4_PEBS_CONFIG_MASK 0xff + +/* + * mem: Only counters MSR_IQ_COUNTER4 (16) and + * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling + */ +#define P4_PEBS_ENABLE 0x02000000U +#define P4_PEBS_ENABLE_UOP_TAG 0x01000000U + +#define p4_config_unpack_metric(v) (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK) +#define p4_config_unpack_pebs(v) (((u64)(v)) & P4_PEBS_CONFIG_MASK) + +#define p4_config_pebs_has(v, mask) (p4_config_unpack_pebs(v) & (mask)) + +enum P4_PEBS_METRIC { + P4_PEBS_METRIC__none, + + P4_PEBS_METRIC__1stl_cache_load_miss_retired, + P4_PEBS_METRIC__2ndl_cache_load_miss_retired, + P4_PEBS_METRIC__dtlb_load_miss_retired, + P4_PEBS_METRIC__dtlb_store_miss_retired, + P4_PEBS_METRIC__dtlb_all_miss_retired, + P4_PEBS_METRIC__tagged_mispred_branch, + P4_PEBS_METRIC__mob_load_replay_retired, + P4_PEBS_METRIC__split_load_retired, + P4_PEBS_METRIC__split_store_retired, + + P4_PEBS_METRIC__max }; #endif /* PERF_EVENT_P4_H */ + -- cgit From 8e221b6db4477643fefc885a97ea9889ac733140 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 22 Jun 2010 16:23:37 -0700 Subject: x86: Avoid unnecessary __clear_user() and xrstor in signal handling fxsave/xsave doesn't touch all the bytes in the memory layout used by these instructions. Specifically SW reserved (bytes 464..511) fields in the fxsave frame and the reserved fields in the xsave header. To present a clean context for the signal handling, just clear these fields instead of clearing the complete fxsave/xsave memory layout, when we dump these registers directly to the user signal frame. Also avoid the call to second xrstor (which inits the state not passed in the signal frame) in restore_user_xstate() if all the state has already been restored by the first xrstor. These changes improve the performance of signal handling(by ~3-5% as measured by the lat_sig). Signed-off-by: Suresh Siddha LKML-Reference: <1277249017.2847.85.camel@sbs-t61.sc.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 9 +++++++++ arch/x86/include/asm/xsave.h | 10 ++++++++++ 2 files changed, 19 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index c991b3a7b904..0f1cf5d53dd8 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -127,6 +127,15 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) { int err; + /* + * Clear the bytes not touched by the fxsave and reserved + * for the SW usage. + */ + err = __clear_user(&fx->sw_reserved, + sizeof(struct _fpx_sw_bytes)); + if (unlikely(err)) + return -EFAULT; + asm volatile("1: rex64/fxsave (%[fx])\n\t" "2:\n" ".section .fixup,\"ax\"\n" diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 2c4390cae228..30dfc81804d5 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -59,6 +59,16 @@ static inline int fpu_xrstor_checking(struct fpu *fpu) static inline int xsave_user(struct xsave_struct __user *buf) { int err; + + /* + * Clear the xsave header first, so that reserved fields are + * initialized to zero. + */ + err = __clear_user(&buf->xsave_hdr, + sizeof(struct xsave_hdr_struct)); + if (unlikely(err)) + return -EFAULT; + __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" "2:\n" ".section .fixup,\"ax\"\n" -- cgit From 24da9c26f3050aee9314ec09930a24c80fe76352 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 7 Jul 2010 10:15:12 -0700 Subject: x86, cpu: Add CPU flags for F16C and RDRND Add support for the newly documented F16C (16-bit floating point conversions) and RDRND (RDRAND instruction) CPU feature flags. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cpufeature.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 2a904f4071fe..aeb6f3f9b2ca 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -124,6 +124,8 @@ #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ +#define X86_FEATURE_F16C (4*32+29) /* 16-bit fp conversions */ +#define X86_FEATURE_RDRND (4*32+30) /* The RDRAND instruction */ #define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ -- cgit From 83a7a2ad2a9173dcabc05df0f01d1d85b7ba1c2c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 10 Jun 2010 00:10:43 +0000 Subject: x86, alternatives: Use 16-bit numbers for cpufeature index We already have cpufeature indicies above 255, so use a 16-bit number for the alternatives index. This consumes a padding field and so doesn't add any size, but it means that abusing the padding field to create assembly errors on overflow no longer works. We can retain the test simply by redirecting it to the .discard section, however. [ v3: updated to include open-coded locations ] Signed-off-by: H. Peter Anvin LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/alternative.h | 7 ++++--- arch/x86/include/asm/cpufeature.h | 14 ++++++++------ 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 03b6bb5394a0..bc6abb7bc7ee 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -45,10 +45,9 @@ struct alt_instr { u8 *instr; /* original instruction */ u8 *replacement; - u8 cpuid; /* cpuid bit set for replacement */ + u16 cpuid; /* cpuid bit set for replacement */ u8 instrlen; /* length of original instruction */ u8 replacementlen; /* length of new instruction, <= instrlen */ - u8 pad1; #ifdef CONFIG_X86_64 u32 pad2; #endif @@ -86,9 +85,11 @@ static inline int alternatives_text_reserved(void *start, void *end) _ASM_ALIGN "\n" \ _ASM_PTR "661b\n" /* label */ \ _ASM_PTR "663f\n" /* new instruction */ \ - " .byte " __stringify(feature) "\n" /* feature bit */ \ + " .word " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* sourcelen */ \ " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .discard,\"aw\",@progbits\n" \ " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \ ".previous\n" \ ".section .altinstr_replacement, \"ax\"\n" \ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 468145914389..e8b88967de35 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -291,7 +291,7 @@ extern const char * const x86_power_flags[32]; * patch the target code for additional performance. * */ -static __always_inline __pure bool __static_cpu_has(u8 bit) +static __always_inline __pure bool __static_cpu_has(u16 bit) { #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) asm goto("1: jmp %l[t_no]\n" @@ -300,11 +300,11 @@ static __always_inline __pure bool __static_cpu_has(u8 bit) _ASM_ALIGN "\n" _ASM_PTR "1b\n" _ASM_PTR "0\n" /* no replacement */ - " .byte %P0\n" /* feature bit */ + " .word %P0\n" /* feature bit */ " .byte 2b - 1b\n" /* source len */ " .byte 0\n" /* replacement len */ - " .byte 0xff + 0 - (2b-1b)\n" /* padding */ ".previous\n" + /* skipping size check since replacement size = 0 */ : : "i" (bit) : : t_no); return true; t_no: @@ -318,10 +318,12 @@ static __always_inline __pure bool __static_cpu_has(u8 bit) _ASM_ALIGN "\n" _ASM_PTR "1b\n" _ASM_PTR "3f\n" - " .byte %P1\n" /* feature bit */ + " .word %P1\n" /* feature bit */ " .byte 2b - 1b\n" /* source len */ " .byte 4f - 3f\n" /* replacement len */ - " .byte 0xff + (4f-3f) - (2b-1b)\n" /* padding */ + ".previous\n" + ".section .discard,\"aw\",@progbits\n" + " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ ".previous\n" ".section .altinstr_replacement,\"ax\"\n" "3: movb $1,%0\n" @@ -337,7 +339,7 @@ static __always_inline __pure bool __static_cpu_has(u8 bit) ( \ __builtin_constant_p(boot_cpu_has(bit)) ? \ boot_cpu_has(bit) : \ - (__builtin_constant_p(bit) && !((bit) & ~0xff)) ? \ + __builtin_constant_p(bit) ? \ __static_cpu_has(bit) : \ boot_cpu_has(bit) \ ) -- cgit From bdc802dcca1709b01988d57e91f9f35ce1609fcc Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 7 Jul 2010 17:29:18 -0700 Subject: x86, cpu: Support the features flags in new CPUID leaf 7 Intel has defined CPUID leaf 7 as the next set of feature flags (see the AVX specification, version 007). Add support for this new feature flags word. Signed-off-by: H. Peter Anvin LKML-Reference: --- arch/x86/include/asm/cpufeature.h | 13 +++++++++---- arch/x86/include/asm/required-features.h | 2 ++ 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index aeb6f3f9b2ca..3ec9275cea46 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -6,7 +6,7 @@ #include -#define NCAPINTS 9 /* N 32-bit words worth of info */ +#define NCAPINTS 10 /* N 32-bit words worth of info */ /* * Note: If the comment begins with a quoted string, that string is used @@ -159,14 +159,14 @@ /* * Auxiliary flags: Linux defined - For features scattered in various - * CPUID levels like 0x6, 0xA etc + * CPUID levels like 0x6, 0xA etc, word 7 */ #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ #define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ #define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB (7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -/* Virtualization flags: Linux defined */ +/* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ #define X86_FEATURE_VNMI (8*32+ 1) /* Intel Virtual NMI */ #define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */ @@ -177,6 +177,9 @@ #define X86_FEATURE_SVML (8*32+7) /* "svm_lock" AMD SVM locking MSR */ #define X86_FEATURE_NRIPS (8*32+8) /* "nrip_save" AMD SVM next_rip save */ +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ +#define X86_FEATURE_FSGSBASE (9*32+0) /* {RD/WR}{FS/GS}BASE instructions*/ + #if defined(__KERNEL__) && !defined(__ASSEMBLY__) #include @@ -197,7 +200,9 @@ extern const char * const x86_power_flags[32]; (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4)) || \ (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5)) || \ (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \ - (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) ) \ + (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) || \ + (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \ + (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) \ ? 1 : \ test_cpu_cap(c, bit)) diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 64cf2d24fad1..6c7fc25f2c34 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -84,5 +84,7 @@ #define REQUIRED_MASK5 0 #define REQUIRED_MASK6 0 #define REQUIRED_MASK7 0 +#define REQUIRED_MASK8 0 +#define REQUIRED_MASK9 0 #endif /* _ASM_X86_REQUIRED_FEATURES_H */ -- cgit From f33ebbe9da2c3c24664a0ad4f8fd83f293547e63 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 6 May 2010 20:17:00 +0200 Subject: unistd: add __NR_prlimit64 syscall numbers Add __NR_prlimit64 syscall numbers to asm-generic. Add them also to asm-x86, both 32 and 64-bit. Signed-off-by: Jiri Slaby Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" --- arch/x86/include/asm/unistd_32.h | 3 ++- arch/x86/include/asm/unistd_64.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index beb9b5f8f8a4..35e0cb151b69 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -343,10 +343,11 @@ #define __NR_rt_tgsigqueueinfo 335 #define __NR_perf_event_open 336 #define __NR_recvmmsg 337 +#define __NR_prlimit64 338 #ifdef __KERNEL__ -#define NR_syscalls 338 +#define NR_syscalls 339 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index ff4307b0e81e..570bf5eae564 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -663,6 +663,8 @@ __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) __SYSCALL(__NR_perf_event_open, sys_perf_event_open) #define __NR_recvmmsg 299 __SYSCALL(__NR_recvmmsg, sys_recvmmsg) +#define __NR_prlimit64 300 +__SYSCALL(__NR_prlimit64, sys_prlimit64) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR -- cgit From 40e1d7a4ffee5cb17f5c36f4c3c4a011ab103ebe Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 19 Jul 2010 16:05:51 -0700 Subject: x86, cpu: Add xsaveopt cpufeature Add cpu feature bit support for the XSAVEOPT instruction. Signed-off-by: Suresh Siddha LKML-Reference: <20100719230205.523204988@sbs-t61.sc.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cpufeature.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 3ec9275cea46..d5ea3e3a8a42 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -165,6 +165,7 @@ #define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ #define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB (7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_XSAVEOPT (7*32+4) /* "xsaveopt" Optimized Xsave */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ -- cgit From 29104e101d710dd152f807978884643a52eca8b7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 19 Jul 2010 16:05:49 -0700 Subject: x86, xsave: Sync xsave memory layout with its header for user handling With xsaveopt, if a processor implementation discern that a processor state component is in its initialized state it may modify the corresponding bit in the xsave_hdr.xstate_bv as '0', with out modifying the corresponding memory layout. Hence wHile presenting the xstate information to the user, we always ensure that the memory layout of a feature will be in the init state if the corresponding header bit is zero. This ensures the consistency and avoids the condition of the user seeing some some stale state in the memory layout during signal handling, debugging etc. Signed-off-by: Suresh Siddha LKML-Reference: <20100719230205.351459480@sbs-t61.sc.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 14 ++++++++++++++ arch/x86/include/asm/xsave.h | 10 ++++++++++ 2 files changed, 24 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index c991b3a7b904..bb370fd0a1c2 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -58,11 +58,25 @@ extern int restore_i387_xstate_ia32(void __user *buf); #define X87_FSW_ES (1 << 7) /* Exception Summary */ +static __always_inline __pure bool use_xsaveopt(void) +{ + return 0; +} + static __always_inline __pure bool use_xsave(void) { return static_cpu_has(X86_FEATURE_XSAVE); } +extern void __sanitize_i387_state(struct task_struct *); + +static inline void sanitize_i387_state(struct task_struct *tsk) +{ + if (!use_xsaveopt()) + return; + __sanitize_i387_state(tsk); +} + #ifdef CONFIG_X86_64 /* Ignore delayed exceptions from user space */ diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 2c4390cae228..0c72adc0cb15 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -111,6 +111,16 @@ static inline void xrstor_state(struct xsave_struct *fx, u64 mask) : "memory"); } +static inline void xsave_state(struct xsave_struct *fx, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + + asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); +} + static inline void fpu_xsave(struct fpu *fpu) { /* This, however, we can work around by forcing the compiler to select -- cgit From 6bad06b768920e278c7cedfdda56a0b4c6a35ee9 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 19 Jul 2010 16:05:52 -0700 Subject: x86, xsave: Use xsaveopt in context-switch path when supported xsaveopt is a more optimized form of xsave specifically designed for the context switch usage. xsaveopt doesn't save the state that's not modified from the prior xrstor. And if a specific feature state gets modified to the init state, then xsaveopt just updates the header bit in the xsave memory layout without updating the corresponding memory layout. Signed-off-by: Suresh Siddha LKML-Reference: <20100719230205.604014179@sbs-t61.sc.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 2 +- arch/x86/include/asm/xsave.h | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index bb370fd0a1c2..59bd93ac7fef 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -60,7 +60,7 @@ extern int restore_i387_xstate_ia32(void __user *buf); static __always_inline __pure bool use_xsaveopt(void) { - return 0; + return static_cpu_has(X86_FEATURE_XSAVEOPT); } static __always_inline __pure bool use_xsave(void) diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 0c72adc0cb15..ec86c5fd6a6e 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -125,8 +125,11 @@ static inline void fpu_xsave(struct fpu *fpu) { /* This, however, we can work around by forcing the compiler to select an addressing mode that doesn't require extended registers. */ - __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27" - : : "D" (&(fpu->state->xsave)), - "a" (-1), "d"(-1) : "memory"); + alternative_input( + ".byte " REX_PREFIX "0x0f,0xae,0x27", + ".byte " REX_PREFIX "0x0f,0xae,0x37", + X86_FEATURE_XSAVEOPT, + [fx] "D" (&fpu->state->xsave), "a" (-1), "d" (-1) : + "memory"); } #endif -- cgit From 278bc5f6abd69dd868746dbd642266ac09a9c9c6 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 19 Jul 2010 18:53:51 -0700 Subject: x86, cpu: Clean up formatting in cpufeature.h, remove override Clean up the formatting in cpufeature.h, and remove an unnecessary name override. Signed-off-by: H. Peter Anvin Cc: Suresh Siddha LKML-Reference: --- arch/x86/include/asm/cpufeature.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index d5ea3e3a8a42..4be50ddd4d79 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -165,7 +165,7 @@ #define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ #define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB (7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -#define X86_FEATURE_XSAVEOPT (7*32+4) /* "xsaveopt" Optimized Xsave */ +#define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ @@ -173,13 +173,13 @@ #define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */ #define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */ #define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */ -#define X86_FEATURE_NPT (8*32+5) /* AMD Nested Page Table support */ -#define X86_FEATURE_LBRV (8*32+6) /* AMD LBR Virtualization support */ -#define X86_FEATURE_SVML (8*32+7) /* "svm_lock" AMD SVM locking MSR */ -#define X86_FEATURE_NRIPS (8*32+8) /* "nrip_save" AMD SVM next_rip save */ +#define X86_FEATURE_NPT (8*32+ 5) /* AMD Nested Page Table support */ +#define X86_FEATURE_LBRV (8*32+ 6) /* AMD LBR Virtualization support */ +#define X86_FEATURE_SVML (8*32+ 7) /* "svm_lock" AMD SVM locking MSR */ +#define X86_FEATURE_NRIPS (8*32+ 8) /* "nrip_save" AMD SVM next_rip save */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ -#define X86_FEATURE_FSGSBASE (9*32+0) /* {RD/WR}{FS/GS}BASE instructions*/ +#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) -- cgit From 5f755293ca61520b70b11afe1b1d6e1635cb6c00 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 20 Jul 2010 15:19:48 -0700 Subject: x86, gcc-4.6: Avoid unused by set variables in rdmsr Avoids quite a lot of warnings with a gcc 4.6 -Wall build because this happens in a commonly used header file (apic.h) Signed-off-by: Andi Kleen LKML-Reference: <201007202219.o6KMJme6021066@imap1.linux-foundation.org> Signed-off-by: Andrew Morton Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index c5bc4c2d33f5..084ef95274cd 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -148,8 +148,8 @@ static inline unsigned long long native_read_pmc(int counter) #define rdmsr(msr, val1, val2) \ do { \ u64 __val = native_read_msr((msr)); \ - (val1) = (u32)__val; \ - (val2) = (u32)(__val >> 32); \ + (void)((val1) = (u32)__val); \ + (void)((val2) = (u32)(__val >> 32)); \ } while (0) static inline void wrmsr(unsigned msr, unsigned low, unsigned high) -- cgit From 7aa2b5f8ec60505160df1c25398e8286c8432689 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 20 Jul 2010 20:50:48 +0200 Subject: x86, xsave: Do not include asm/i387.h in asm/xsave.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no dependencies to asm/i387.h. Instead, if including only xsave.h the following error occurs: .../arch/x86/include/asm/i387.h:110: error: ‘XSTATE_FP’ undeclared (first use in this function) .../arch/x86/include/asm/i387.h:110: error: (Each undeclared identifier is reported only once .../arch/x86/include/asm/i387.h:110: error: for each function it appears in.) This patch fixes this. Signed-off-by: Robert Richter LKML-Reference: <1279651857-24639-2-git-send-email-robert.richter@amd.com> Acked-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/xsave.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index ec86c5fd6a6e..94d5f84d89f2 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -3,7 +3,6 @@ #include #include -#include #define XSTATE_FP 0x1 #define XSTATE_SSE 0x2 -- cgit From a751bd858b16dce57f3b6b85ba07946df1bd7be4 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 20 Jul 2010 15:19:45 -0700 Subject: x86, rwsem: Stay on fast path when count > 0 in __up_write() When count > 0 there is no need to take the call_rwsem_wake path. If we did take that path, it would just return without doing anything due to the active count not being zero. Signed-off-by: Michel Lespinasse LKML-Reference: <201007202219.o6KMJj9x021042@imap1.linux-foundation.org> Acked-by: David Howells Cc: Mike Waychison Cc: Suleiman Souhlal Cc: Ying Han Signed-off-by: Andrew Morton Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/rwsem.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 606ede126972..5bf5e04e497f 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -216,9 +216,8 @@ static inline void __up_write(struct rw_semaphore *sem) rwsem_count_t tmp; asm volatile("# beginning __up_write\n\t" LOCK_PREFIX " xadd %1,(%2)\n\t" - /* tries to transition - 0xffff0001 -> 0x00000000 */ - " jz 1f\n" + /* subtracts 0xffff0001, returns the old value */ + " jns 1f\n\t" " call call_rwsem_wake\n" "1:\n\t" "# ending __up_write\n" -- cgit From b4bcb4c28c64cc2876b4aef218d992ce806194da Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 20 Jul 2010 15:19:45 -0700 Subject: x86, rwsem: Minor cleanups Clarified few comments and made initialization of %edx/%rdx more uniform accross __down_write_nested, __up_read and __up_write functions. Signed-off-by: Michel Lespinasse LKML-Reference: <201007202219.o6KMJkiA021048@imap1.linux-foundation.org> Acked-by: David Howells Cc: Mike Waychison Cc: Suleiman Souhlal Cc: Ying Han Signed-off-by: Andrew Morton Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/rwsem.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 5bf5e04e497f..d1e41b0f9b60 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -118,7 +118,7 @@ static inline void __down_read(struct rw_semaphore *sem) { asm volatile("# beginning down_read\n\t" LOCK_PREFIX _ASM_INC "(%1)\n\t" - /* adds 0x00000001, returns the old value */ + /* adds 0x00000001 */ " jns 1f\n" " call call_rwsem_down_read_failed\n" "1:\n\t" @@ -156,11 +156,9 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) { rwsem_count_t tmp; - - tmp = RWSEM_ACTIVE_WRITE_BIAS; asm volatile("# beginning down_write\n\t" LOCK_PREFIX " xadd %1,(%2)\n\t" - /* subtract 0x0000ffff, returns the old value */ + /* adds 0xffff0001, returns the old value */ " test %1,%1\n\t" /* was the count 0 before? */ " jz 1f\n" @@ -168,7 +166,7 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) "1:\n" "# ending down_write" : "+m" (sem->count), "=d" (tmp) - : "a" (sem), "1" (tmp) + : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) : "memory", "cc"); } @@ -195,16 +193,16 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) */ static inline void __up_read(struct rw_semaphore *sem) { - rwsem_count_t tmp = -RWSEM_ACTIVE_READ_BIAS; + rwsem_count_t tmp; asm volatile("# beginning __up_read\n\t" LOCK_PREFIX " xadd %1,(%2)\n\t" /* subtracts 1, returns the old value */ " jns 1f\n\t" - " call call_rwsem_wake\n" + " call call_rwsem_wake\n" /* expects old value in %edx */ "1:\n" "# ending __up_read\n" : "+m" (sem->count), "=d" (tmp) - : "a" (sem), "1" (tmp) + : "a" (sem), "1" (-RWSEM_ACTIVE_READ_BIAS) : "memory", "cc"); } @@ -218,7 +216,7 @@ static inline void __up_write(struct rw_semaphore *sem) LOCK_PREFIX " xadd %1,(%2)\n\t" /* subtracts 0xffff0001, returns the old value */ " jns 1f\n\t" - " call call_rwsem_wake\n" + " call call_rwsem_wake\n" /* expects old value in %edx */ "1:\n\t" "# ending __up_write\n" : "+m" (sem->count), "=d" (tmp) -- cgit From 5edd19af18a36a4e22c570b1b969179e0ca1fe4c Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Tue, 20 Jul 2010 18:09:05 -0500 Subject: x86, UV: Make kdump avoid stack dumps UV NMI callback's should not write stack dumps when a kdump is to be written. When invoking the crash kernel to write a dump, kdump_nmi_shootdown_cpus() uses NMI's to get all the cpu's to save their register context and halt. But the NMI interrupt handler runs a callback list. This patch sets a flag to prevent any of those callbacks from interfering with the halt of the cpu. For UV, which currently has the only callback to which this is relevant, the uv_handle_nmi() callback should not do dumping of stacks. The 'in_crash_kexec' flag is defined as an extern in kdebug.h firstly because x2apic_uv_x.c includes it. Secondly because some future callback might need the flag to know that it should not enter the debugger. (Such a scenario was in fact present in the 2.6.32 kernel, SuSE distribution, where a call to kdb needed to be avoided.) Signed-off-by: Cliff Wickman LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/kdebug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index fa7c0b974761..7a2910b3512b 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -33,5 +33,6 @@ extern void __show_regs(struct pt_regs *regs, int all); extern void show_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); +extern int in_crash_kexec; #endif /* _ASM_X86_KDEBUG_H */ -- cgit From 0e49bf66d2ca649b167428adddbbbe9d9bd4894c Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 21 Jul 2010 19:03:52 +0200 Subject: x86, xsave: Separate fpu and xsave initialization As xsave also supports other than fpu features, it should be initialized independently of the fpu. This patch moves this out of fpu initialization. There is also a lot of cross referencing between fpu and xsave code. This patch reduces this by making xsave_cntxt_init() and init_thread_xstate() static functions. The patch moves the cpu_has_xsave check at the beginning of xsave_init(). All other checks may removed then. Signed-off-by: Robert Richter LKML-Reference: <1279731838-1522-2-git-send-email-robert.richter@amd.com> Acked-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 1 - arch/x86/include/asm/xsave.h | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 59bd93ac7fef..509ddabeae25 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -31,7 +31,6 @@ extern void mxcsr_feature_mask_init(void); extern int init_fpu(struct task_struct *child); extern asmlinkage void math_state_restore(void); extern void __math_state_restore(void); -extern void init_thread_xstate(void); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern user_regset_active_fn fpregs_active, xfpregs_active; diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 94d5f84d89f2..4d3b5d1fc028 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -28,7 +28,6 @@ extern u64 pcntxt_mask; extern struct xsave_struct *init_xstate_buf; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; -extern void xsave_cntxt_init(void); extern void xsave_init(void); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); extern int init_fpu(struct task_struct *child); -- cgit From ee813d53a8e980a3a28318efb8935d45723f5211 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 21 Jul 2010 19:03:54 +0200 Subject: x86, xsave: Check cpuid level for XSTATE_CPUID (0x0d) The patch introduces the XSTATE_CPUID macro and adds a check that tests if XSTATE_CPUID exists. Signed-off-by: Robert Richter LKML-Reference: <1279731838-1522-4-git-send-email-robert.richter@amd.com> Acked-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/xsave.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 4d3b5d1fc028..d1b5f3a2fa20 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -4,6 +4,8 @@ #include #include +#define XSTATE_CPUID 0x0000000d + #define XSTATE_FP 0x1 #define XSTATE_SSE 0x2 #define XSTATE_YMM 0x4 -- cgit From 45c2d7f46211a0b1f6b425c59575c53145afc4b4 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 21 Jul 2010 19:03:55 +0200 Subject: x86, xsave: Make init_xstate_buf static The pointer is only used in xsave.c. Making it static. Signed-off-by: Robert Richter LKML-Reference: <1279731838-1522-5-git-send-email-robert.richter@amd.com> Acked-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/xsave.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index d1b5f3a2fa20..0ae6b9961985 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -27,7 +27,6 @@ extern unsigned int xstate_size; extern u64 pcntxt_mask; -extern struct xsave_struct *init_xstate_buf; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void xsave_init(void); -- cgit From 8c06585d6431addadd94903843dfbcd315b42d4e Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sat, 17 Jul 2010 09:03:26 -0400 Subject: x86: Remove redundant K6 MSRs MSR_K6_EFER is unused, and MSR_K6_STAR is redundant with MSR_STAR. Signed-off-by: Brian Gerst LKML-Reference: <1279371808-24804-1-git-send-email-brgerst@gmail.com> Reviewed-by: Pekka Enberg Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msr-index.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 8c7ae4318629..6068e0e06e00 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -159,8 +159,6 @@ #define MSR_K7_FID_VID_STATUS 0xc0010042 /* K6 MSRs */ -#define MSR_K6_EFER 0xc0000080 -#define MSR_K6_STAR 0xc0000081 #define MSR_K6_WHCR 0xc0000082 #define MSR_K6_UWCCR 0xc0000085 #define MSR_K6_EPMR 0xc0000086 -- cgit From 18f19aa62a267f2f759e278018f1032adf4c3774 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 14 May 2010 12:38:24 +0100 Subject: xen: Add support for HVM hypercalls. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Sheng Yang Signed-off-by: Stefano Stabellini --- arch/x86/include/asm/xen/hypercall.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 9c371e4a9fa6..7fda040a76cd 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -417,6 +417,12 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg) return _hypercall2(int, nmi_op, op, arg); } +static inline unsigned long __must_check +HYPERVISOR_hvm_op(int op, void *arg) +{ + return _hypercall2(unsigned long, hvm_op, op, arg); +} + static inline void MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) { -- cgit From bee6ab53e652a414af20392899879b58cd80d033 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 14 May 2010 12:39:33 +0100 Subject: x86: early PV on HVM features initialization. Initialize basic pv on hvm features adding a new Xen HVM specific hypervisor_x86 structure. Don't try to initialize xen-kbdfront and xen-fbfront when running on HVM because the backends are not available. Signed-off-by: Stefano Stabellini Signed-off-by: Sheng Yang Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/hypervisor.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 70abda7058c8..ff2546ce7178 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -45,5 +45,6 @@ extern const struct hypervisor_x86 *x86_hyper; /* Recognized hypervisors */ extern const struct hypervisor_x86 x86_hyper_vmware; extern const struct hypervisor_x86 x86_hyper_ms_hyperv; +extern const struct hypervisor_x86 x86_hyper_xen_hvm; #endif -- cgit From 38e20b07efd541a959de367dc90a17f92ce2e8a6 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 14 May 2010 12:40:51 +0100 Subject: x86/xen: event channels delivery on HVM. Set the callback to receive evtchns from Xen, using the callback vector delivery mechanism. The traditional way for receiving event channel notifications from Xen is via the interrupts from the platform PCI device. The callback vector is a newer alternative that allow us to receive notifications on any vcpu and doesn't need any PCI support: we allocate a vector exclusively to receive events, in the vector handler we don't need to interact with the vlapic, therefore we avoid a VMEXIT. Signed-off-by: Stefano Stabellini Signed-off-by: Sheng Yang Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/irq_vectors.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 8767d99c4f64..e2ca30092557 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -125,6 +125,9 @@ */ #define MCE_SELF_VECTOR 0xeb +/* Xen vector callback to receive events in a HVM domain */ +#define XEN_HVM_EVTCHN_CALLBACK 0xe9 + #define NR_VECTORS 256 #define FPU_IRQ 13 -- cgit From bbbe57386e857eb2a8d4abcae71063c819c06ff1 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 9 Feb 2010 14:30:55 -0500 Subject: pci-swiotlb-xen: Add glue code to setup dma_ops utilizing xen_swiotlb_* functions. We add the glue code that sets up a dma_ops structure with the xen_swiotlb_* functions. The code turns on xen_swiotlb flag when it detects it is running under Xen and it is either in privileged mode or the iommu=soft flag was passed in. It also disables the bare-metal SWIOTLB if the Xen-SWIOTLB has been enabled. Note: The Xen-SWIOTLB is only built when CONFIG_XEN is enabled. Signed-off-by: Konrad Rzeszutek Wilk Acked-by: Jeremy Fitzhardinge Cc: FUJITA Tomonori Cc: Albert Herranz Cc: Ian Campbell --- arch/x86/include/asm/xen/swiotlb-xen.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 arch/x86/include/asm/xen/swiotlb-xen.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h new file mode 100644 index 000000000000..1be1ab7d6a41 --- /dev/null +++ b/arch/x86/include/asm/xen/swiotlb-xen.h @@ -0,0 +1,14 @@ +#ifndef _ASM_X86_SWIOTLB_XEN_H +#define _ASM_X86_SWIOTLB_XEN_H + +#ifdef CONFIG_SWIOTLB_XEN +extern int xen_swiotlb; +extern int __init pci_xen_swiotlb_detect(void); +extern void __init pci_xen_swiotlb_init(void); +#else +#define xen_swiotlb (0) +static inline int __init pci_xen_swiotlb_detect(void) { return 0; } +static inline void __init pci_xen_swiotlb_init(void) { } +#endif + +#endif /* _ASM_X86_SWIOTLB_XEN_H */ -- cgit From 113fc5a6e8c2288619ff7e8187a6f556b7e0d372 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 27 Jul 2010 17:01:49 -0700 Subject: x86: Add memory modify constraints to xchg() and cmpxchg() xchg() and cmpxchg() modify their memory operands, not merely read them. For some versions of gcc the "memory" clobber has apparently dealt with the situation, but not for all. Originally-by: Linus Torvalds Signed-off-by: H. Peter Anvin Cc: Glauber Costa Cc: Avi Kivity Cc: Peter Palfrader Cc: Greg KH Cc: Alan Cox Cc: Zachary Amsden Cc: Marcelo Tosatti Cc: LKML-Reference: <4C4F7277.8050306@zytor.com> --- arch/x86/include/asm/cmpxchg_32.h | 68 +++++++++++++++++++-------------------- arch/x86/include/asm/cmpxchg_64.h | 40 +++++++++++------------ 2 files changed, 54 insertions(+), 54 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 8859e12dd3cf..c1cf59d72f09 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -27,20 +27,20 @@ struct __xchg_dummy { switch (size) { \ case 1: \ asm volatile("xchgb %b0,%1" \ - : "=q" (__x) \ - : "m" (*__xg(ptr)), "0" (__x) \ + : "=q" (__x), "+m" (*__xg(ptr)) \ + : "0" (__x) \ : "memory"); \ break; \ case 2: \ asm volatile("xchgw %w0,%1" \ - : "=r" (__x) \ - : "m" (*__xg(ptr)), "0" (__x) \ + : "=r" (__x), "+m" (*__xg(ptr)) \ + : "0" (__x) \ : "memory"); \ break; \ case 4: \ asm volatile("xchgl %0,%1" \ - : "=r" (__x) \ - : "m" (*__xg(ptr)), "0" (__x) \ + : "=r" (__x), "+m" (*__xg(ptr)) \ + : "0" (__x) \ : "memory"); \ break; \ default: \ @@ -70,14 +70,14 @@ static inline void __set_64bit(unsigned long long *ptr, unsigned int low, unsigned int high) { asm volatile("\n1:\t" - "movl (%0), %%eax\n\t" - "movl 4(%0), %%edx\n\t" - LOCK_PREFIX "cmpxchg8b (%0)\n\t" + "movl (%1), %%eax\n\t" + "movl 4(%1), %%edx\n\t" + LOCK_PREFIX "cmpxchg8b (%1)\n\t" "jnz 1b" - : /* no outputs */ - : "D"(ptr), - "b"(low), - "c"(high) + : "=m" (*ptr) + : "D" (ptr), + "b" (low), + "c" (high) : "ax", "dx", "memory"); } @@ -121,21 +121,21 @@ extern void __cmpxchg_wrong_size(void); __typeof__(*(ptr)) __new = (new); \ switch (size) { \ case 1: \ - asm volatile(lock "cmpxchgb %b1,%2" \ - : "=a"(__ret) \ - : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ + asm volatile(lock "cmpxchgb %b2,%1" \ + : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "q" (__new), "0" (__old) \ : "memory"); \ break; \ case 2: \ - asm volatile(lock "cmpxchgw %w1,%2" \ - : "=a"(__ret) \ - : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + asm volatile(lock "cmpxchgw %w2,%1" \ + : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "r" (__new), "0" (__old) \ : "memory"); \ break; \ case 4: \ - asm volatile(lock "cmpxchgl %1,%2" \ - : "=a"(__ret) \ - : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + asm volatile(lock "cmpxchgl %2,%1" \ + : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "r" (__new), "0" (__old) \ : "memory"); \ break; \ default: \ @@ -180,12 +180,12 @@ static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long new) { unsigned long long prev; - asm volatile(LOCK_PREFIX "cmpxchg8b %3" - : "=A"(prev) - : "b"((unsigned long)new), - "c"((unsigned long)(new >> 32)), - "m"(*__xg(ptr)), - "0"(old) + asm volatile(LOCK_PREFIX "cmpxchg8b %1" + : "=A" (prev), + "+m" (*__xg(ptr)) + : "b" ((unsigned long)new), + "c" ((unsigned long)(new >> 32)), + "0" (old) : "memory"); return prev; } @@ -195,12 +195,12 @@ static inline unsigned long long __cmpxchg64_local(volatile void *ptr, unsigned long long new) { unsigned long long prev; - asm volatile("cmpxchg8b %3" - : "=A"(prev) - : "b"((unsigned long)new), - "c"((unsigned long)(new >> 32)), - "m"(*__xg(ptr)), - "0"(old) + asm volatile("cmpxchg8b %1" + : "=A" (prev), + "+m" (*__xg(ptr)) + : "b" ((unsigned long)new), + "c" ((unsigned long)(new >> 32)), + "0" (old) : "memory"); return prev; } diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 485ae415faec..b92f147339f3 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -26,26 +26,26 @@ extern void __cmpxchg_wrong_size(void); switch (size) { \ case 1: \ asm volatile("xchgb %b0,%1" \ - : "=q" (__x) \ - : "m" (*__xg(ptr)), "0" (__x) \ + : "=q" (__x), "+m" (*__xg(ptr)) \ + : "0" (__x) \ : "memory"); \ break; \ case 2: \ asm volatile("xchgw %w0,%1" \ - : "=r" (__x) \ - : "m" (*__xg(ptr)), "0" (__x) \ + : "=r" (__x), "+m" (*__xg(ptr)) \ + : "0" (__x) \ : "memory"); \ break; \ case 4: \ asm volatile("xchgl %k0,%1" \ - : "=r" (__x) \ - : "m" (*__xg(ptr)), "0" (__x) \ + : "=r" (__x), "+m" (*__xg(ptr)) \ + : "0" (__x) \ : "memory"); \ break; \ case 8: \ asm volatile("xchgq %0,%1" \ - : "=r" (__x) \ - : "m" (*__xg(ptr)), "0" (__x) \ + : "=r" (__x), "+m" (*__xg(ptr)) \ + : "0" (__x) \ : "memory"); \ break; \ default: \ @@ -71,27 +71,27 @@ extern void __cmpxchg_wrong_size(void); __typeof__(*(ptr)) __new = (new); \ switch (size) { \ case 1: \ - asm volatile(lock "cmpxchgb %b1,%2" \ - : "=a"(__ret) \ - : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ + asm volatile(lock "cmpxchgb %b2,%1" \ + : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "q" (__new), "0" (__old) \ : "memory"); \ break; \ case 2: \ - asm volatile(lock "cmpxchgw %w1,%2" \ - : "=a"(__ret) \ - : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + asm volatile(lock "cmpxchgw %w2,%1" \ + : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "r" (__new), "0" (__old) \ : "memory"); \ break; \ case 4: \ - asm volatile(lock "cmpxchgl %k1,%2" \ - : "=a"(__ret) \ - : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + asm volatile(lock "cmpxchgl %k2,%1" \ + : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "r" (__new), "0" (__old) \ : "memory"); \ break; \ case 8: \ - asm volatile(lock "cmpxchgq %1,%2" \ - : "=a"(__ret) \ - : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + asm volatile(lock "cmpxchgq %2,%1" \ + : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "r" (__new), "0" (__old) \ : "memory"); \ break; \ default: \ -- cgit From c7f52cdc2f3e1733d3864e439ac2e92edd99ef31 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 22 Jul 2010 22:58:01 -0700 Subject: support multiple .discard.* sections to avoid section type conflicts gcc 4.4.4 will complain if you use a .discard section for both text and data ("causes a section type conflict"). Add support for ".discard.*" sections, and use .discard.text for a dummy function in the x86 RESERVE_BRK() macro. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/setup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 86b1506f4179..ef292c792d74 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -82,7 +82,7 @@ void *extend_brk(size_t size, size_t align); * executable.) */ #define RESERVE_BRK(name,sz) \ - static void __section(.discard) __used \ + static void __section(.discard.text) __used \ __brk_reservation_fn_##name##__(void) { \ asm volatile ( \ ".pushsection .brk_reservation,\"aw\",@nobits;" \ -- cgit From 69309a05907546fb686b251d4ab041c26afe1e1d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 27 Jul 2010 23:29:52 -0700 Subject: x86, asm: Clean up and simplify set_64bit() Clean up and simplify set_64bit(). This code is quite old (1.3.11) and contains a fair bit of auxilliary machinery that current versions of gcc handle just fine automatically. Worse, the auxilliary machinery can actually cause an unnecessary spill to memory. Furthermore, the loading of the old value inside the loop in the 32-bit case is unnecessary: if the value doesn't match, the CMPXCHG8B instruction will already have loaded the "new previous" value for us. Clean up the comment, too, and remove page references to obsolete versions of the Intel SDM. Signed-off-by: H. Peter Anvin LKML-Reference: --- arch/x86/include/asm/cmpxchg_32.h | 67 ++++++++++++--------------------------- arch/x86/include/asm/cmpxchg_64.h | 4 +-- 2 files changed, 21 insertions(+), 50 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index c1cf59d72f09..20955ea7bc12 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -53,60 +53,33 @@ struct __xchg_dummy { __xchg((v), (ptr), sizeof(*ptr)) /* - * The semantics of XCHGCMP8B are a bit strange, this is why - * there is a loop and the loading of %%eax and %%edx has to - * be inside. This inlines well in most cases, the cached - * cost is around ~38 cycles. (in the future we might want - * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that - * might have an implicit FPU-save as a cost, so it's not - * clear which path to go.) + * CMPXCHG8B only writes to the target if we had the previous + * value in registers, otherwise it acts as a read and gives us the + * "new previous" value. That is why there is a loop. Preloading + * EDX:EAX is a performance optimization: in the common case it means + * we need only one locked operation. * - * cmpxchg8b must be used with the lock prefix here to allow - * the instruction to be executed atomically, see page 3-102 - * of the instruction set reference 24319102.pdf. We need - * the reader side to see the coherent 64bit value. + * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very + * least an FPU save and/or %cr0.ts manipulation. + * + * cmpxchg8b must be used with the lock prefix here to allow the + * instruction to be executed atomically. We need to have the reader + * side to see the coherent 64bit value. */ -static inline void __set_64bit(unsigned long long *ptr, - unsigned int low, unsigned int high) +static inline void set_64bit(volatile u64 *ptr, u64 value) { + u32 low = value; + u32 high = value >> 32; + u64 prev = *ptr; + asm volatile("\n1:\t" - "movl (%1), %%eax\n\t" - "movl 4(%1), %%edx\n\t" - LOCK_PREFIX "cmpxchg8b (%1)\n\t" + LOCK_PREFIX "cmpxchg8b %0\n\t" "jnz 1b" - : "=m" (*ptr) - : "D" (ptr), - "b" (low), - "c" (high) - : "ax", "dx", "memory"); -} - -static inline void __set_64bit_constant(unsigned long long *ptr, - unsigned long long value) -{ - __set_64bit(ptr, (unsigned int)value, (unsigned int)(value >> 32)); -} - -#define ll_low(x) *(((unsigned int *)&(x)) + 0) -#define ll_high(x) *(((unsigned int *)&(x)) + 1) - -static inline void __set_64bit_var(unsigned long long *ptr, - unsigned long long value) -{ - __set_64bit(ptr, ll_low(value), ll_high(value)); + : "=m" (*ptr), "+A" (prev) + : "b" (low), "c" (high) + : "memory"); } -#define set_64bit(ptr, value) \ - (__builtin_constant_p((value)) \ - ? __set_64bit_constant((ptr), (value)) \ - : __set_64bit_var((ptr), (value))) - -#define _set_64bit(ptr, value) \ - (__builtin_constant_p(value) \ - ? __set_64bit(ptr, (unsigned int)(value), \ - (unsigned int)((value) >> 32)) \ - : __set_64bit(ptr, ll_low((value)), ll_high((value)))) - extern void __cmpxchg_wrong_size(void); /* diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index b92f147339f3..9596e7c61960 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -5,13 +5,11 @@ #define __xg(x) ((volatile long *)(x)) -static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) +static inline void set_64bit(volatile u64 *ptr, u64 val) { *ptr = val; } -#define _set_64bit set_64bit - extern void __xchg_wrong_size(void); extern void __cmpxchg_wrong_size(void); -- cgit From 11637e4b7dc098e9a863f0a619d55ebc60f5949e Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 17 Dec 2009 21:24:25 -0500 Subject: fanotify: fanotify_init syscall declaration This patch defines a new syscall fanotify_init() of the form: int sys_fanotify_init(unsigned int flags, unsigned int event_f_flags, unsigned int priority) This syscall is used to create and fanotify group. This is very similar to the inotify_init() syscall. Signed-off-by: Eric Paris --- arch/x86/include/asm/unistd_32.h | 3 ++- arch/x86/include/asm/unistd_64.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index beb9b5f8f8a4..981c7e7ad804 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -343,10 +343,11 @@ #define __NR_rt_tgsigqueueinfo 335 #define __NR_perf_event_open 336 #define __NR_recvmmsg 337 +#define __NR_fanotify_init 338 #ifdef __KERNEL__ -#define NR_syscalls 338 +#define NR_syscalls 339 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index ff4307b0e81e..4f23e04bdb34 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -663,6 +663,8 @@ __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) __SYSCALL(__NR_perf_event_open, sys_perf_event_open) #define __NR_recvmmsg 299 __SYSCALL(__NR_recvmmsg, sys_recvmmsg) +#define __NR_fanotify_init 300 +__SYSCALL(__NR_fanotify_init, sys_fanotify_init) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR -- cgit From bbaa4168b2d2d8cc674e6d35806e8426aef464b8 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 17 Dec 2009 21:24:26 -0500 Subject: fanotify: sys_fanotify_mark declartion This patch simply declares the new sys_fanotify_mark syscall int fanotify_mark(int fanotify_fd, unsigned int flags, u64_mask, int dfd const char *pathname) Signed-off-by: Eric Paris --- arch/x86/include/asm/sys_ia32.h | 3 +++ arch/x86/include/asm/unistd_32.h | 3 ++- arch/x86/include/asm/unistd_64.h | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 3ad421784ae7..cf4e2e381cba 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -80,4 +80,7 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *); /* ia32/ipc32.c */ asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32); + +asmlinkage long sys32_fanotify_mark(int, unsigned int, u32, u32, int, + const char __user *); #endif /* _ASM_X86_SYS_IA32_H */ diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 981c7e7ad804..80b799cd74f7 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -344,10 +344,11 @@ #define __NR_perf_event_open 336 #define __NR_recvmmsg 337 #define __NR_fanotify_init 338 +#define __NR_fanotify_mark 339 #ifdef __KERNEL__ -#define NR_syscalls 339 +#define NR_syscalls 340 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 4f23e04bdb34..5b7b1d585616 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -665,6 +665,8 @@ __SYSCALL(__NR_perf_event_open, sys_perf_event_open) __SYSCALL(__NR_recvmmsg, sys_recvmmsg) #define __NR_fanotify_init 300 __SYSCALL(__NR_fanotify_init, sys_fanotify_init) +#define __NR_fanotify_mark 301 +__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR -- cgit From d78d671db478eb8b14c78501c0cee1cc7baf6967 Mon Sep 17 00:00:00 2001 From: Hans Rosenfeld Date: Wed, 28 Jul 2010 19:09:30 +0200 Subject: x86, cpu: AMD errata checking framework Errata are defined using the AMD_LEGACY_ERRATUM() or AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that have an OSVW id assigned, which it takes as first argument. Both take a variable number of family-specific model-stepping ranges created by AMD_MODEL_RANGE(). Iff an erratum has an OSVW id, OSVW is available on the CPU, and the OSVW id is known to the hardware, it is used to determine whether an erratum is present. Otherwise, the model-stepping ranges are matched against the current CPU to find out whether the erratum applies. For certain special errata, the code using this framework might have to conduct further checks to make sure an erratum is really (not) present. Signed-off-by: Hans Rosenfeld LKML-Reference: <1280336972-865982-1-git-send-email-hans.rosenfeld@amd.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/processor.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7e5c6a60b8ee..5084c2f5ac20 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -1025,4 +1025,22 @@ unsigned long calc_aperfmperf_ratio(struct aperfmperf *old, return ratio; } +/* + * AMD errata checking + */ +#ifdef CONFIG_CPU_SUP_AMD +extern bool cpu_has_amd_erratum(const int *); + +#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 } +#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 } +#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \ + ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end)) +#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff) +#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff) +#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff) + +#else +#define cpu_has_amd_erratum(x) (false) +#endif /* CONFIG_CPU_SUP_AMD */ + #endif /* _ASM_X86_PROCESSOR_H */ -- cgit From 9d8888c2a214aece2494a49e699a097c2ba9498b Mon Sep 17 00:00:00 2001 From: Hans Rosenfeld Date: Wed, 28 Jul 2010 19:09:31 +0200 Subject: x86, cpu: Clean up AMD erratum 400 workaround Remove check_c1e_idle() and use the new AMD errata checking framework instead. Signed-off-by: Hans Rosenfeld LKML-Reference: <1280336972-865982-2-git-send-email-hans.rosenfeld@amd.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/processor.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 5084c2f5ac20..eebdc1fde3d1 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -1029,6 +1029,7 @@ unsigned long calc_aperfmperf_ratio(struct aperfmperf *old, * AMD errata checking */ #ifdef CONFIG_CPU_SUP_AMD +extern const int amd_erratum_400[]; extern bool cpu_has_amd_erratum(const int *); #define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 } -- cgit From 1be85a6d93f4207d8c2c6238c4a96895e28cefba Mon Sep 17 00:00:00 2001 From: Hans Rosenfeld Date: Wed, 28 Jul 2010 19:09:32 +0200 Subject: x86, cpu: Use AMD errata checking framework for erratum 383 Use the AMD errata checking framework instead of open-coding the test. Signed-off-by: Hans Rosenfeld LKML-Reference: <1280336972-865982-3-git-send-email-hans.rosenfeld@amd.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/processor.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index eebdc1fde3d1..d85637bb9505 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -1029,6 +1029,7 @@ unsigned long calc_aperfmperf_ratio(struct aperfmperf *old, * AMD errata checking */ #ifdef CONFIG_CPU_SUP_AMD +extern const int amd_erratum_383[]; extern const int amd_erratum_400[]; extern bool cpu_has_amd_erratum(const int *); -- cgit From 4532b305e8f0c238dd73048068ff8a6dd1380291 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 28 Jul 2010 15:18:35 -0700 Subject: x86, asm: Clean up and simplify Remove the __xg() hack to create a memory barrier near xchg and cmpxchg; it has been there since 1.3.11 but should not be necessary with "asm volatile" and a "memory" clobber, neither of which were there in the original implementation. However, we *should* make this a volatile reference. Signed-off-by: H. Peter Anvin LKML-Reference: --- arch/x86/include/asm/cmpxchg_32.h | 75 ++++++++++++++++++++++----------------- arch/x86/include/asm/cmpxchg_64.h | 61 +++++++++++++++++++++---------- 2 files changed, 84 insertions(+), 52 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 20955ea7bc12..f5bd1fd388ff 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -11,38 +11,42 @@ extern void __xchg_wrong_size(void); /* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway - * Note 2: xchg has side effect, so that attribute volatile is necessary, - * but generally the primitive is invalid, *ptr is output argument. --ANK + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. + * Since this is generally used to protect other memory information, we + * use "asm volatile" and "memory" clobbers to prevent gcc from moving + * information around. */ - -struct __xchg_dummy { - unsigned long a[100]; -}; -#define __xg(x) ((struct __xchg_dummy *)(x)) - #define __xchg(x, ptr, size) \ ({ \ __typeof(*(ptr)) __x = (x); \ switch (size) { \ case 1: \ - asm volatile("xchgb %b0,%1" \ - : "=q" (__x), "+m" (*__xg(ptr)) \ + { \ + volatile u8 *__ptr = (volatile u8 *)(ptr); \ + asm volatile("xchgb %0,%1" \ + : "=q" (__x), "+m" (*__ptr) \ : "0" (__x) \ : "memory"); \ break; \ + } \ case 2: \ - asm volatile("xchgw %w0,%1" \ - : "=r" (__x), "+m" (*__xg(ptr)) \ + { \ + volatile u16 *__ptr = (volatile u16 *)(ptr); \ + asm volatile("xchgw %0,%1" \ + : "=r" (__x), "+m" (*__ptr) \ : "0" (__x) \ : "memory"); \ break; \ + } \ case 4: \ + { \ + volatile u32 *__ptr = (volatile u32 *)(ptr); \ asm volatile("xchgl %0,%1" \ - : "=r" (__x), "+m" (*__xg(ptr)) \ + : "=r" (__x), "+m" (*__ptr) \ : "0" (__x) \ : "memory"); \ break; \ + } \ default: \ __xchg_wrong_size(); \ } \ @@ -94,23 +98,32 @@ extern void __cmpxchg_wrong_size(void); __typeof__(*(ptr)) __new = (new); \ switch (size) { \ case 1: \ - asm volatile(lock "cmpxchgb %b2,%1" \ - : "=a" (__ret), "+m" (*__xg(ptr)) \ + { \ + volatile u8 *__ptr = (volatile u8 *)(ptr); \ + asm volatile(lock "cmpxchgb %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ : "q" (__new), "0" (__old) \ : "memory"); \ break; \ + } \ case 2: \ - asm volatile(lock "cmpxchgw %w2,%1" \ - : "=a" (__ret), "+m" (*__xg(ptr)) \ + { \ + volatile u16 *__ptr = (volatile u16 *)(ptr); \ + asm volatile(lock "cmpxchgw %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ break; \ + } \ case 4: \ + { \ + volatile u32 *__ptr = (volatile u32 *)(ptr); \ asm volatile(lock "cmpxchgl %2,%1" \ - : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ break; \ + } \ default: \ __cmpxchg_wrong_size(); \ } \ @@ -148,31 +161,27 @@ extern void __cmpxchg_wrong_size(void); (unsigned long long)(n))) #endif -static inline unsigned long long __cmpxchg64(volatile void *ptr, - unsigned long long old, - unsigned long long new) +static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new) { - unsigned long long prev; + u64 prev; asm volatile(LOCK_PREFIX "cmpxchg8b %1" : "=A" (prev), - "+m" (*__xg(ptr)) - : "b" ((unsigned long)new), - "c" ((unsigned long)(new >> 32)), + "+m" (*ptr) + : "b" ((u32)new), + "c" ((u32)(new >> 32)), "0" (old) : "memory"); return prev; } -static inline unsigned long long __cmpxchg64_local(volatile void *ptr, - unsigned long long old, - unsigned long long new) +static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) { - unsigned long long prev; + u64 prev; asm volatile("cmpxchg8b %1" : "=A" (prev), - "+m" (*__xg(ptr)) - : "b" ((unsigned long)new), - "c" ((unsigned long)(new >> 32)), + "+m" (*ptr) + : "b" ((u32)new), + "c" ((u32)(new >> 32)), "0" (old) : "memory"); return prev; diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 9596e7c61960..423ae58aa020 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -3,8 +3,6 @@ #include /* Provides LOCK_PREFIX */ -#define __xg(x) ((volatile long *)(x)) - static inline void set_64bit(volatile u64 *ptr, u64 val) { *ptr = val; @@ -14,38 +12,51 @@ extern void __xchg_wrong_size(void); extern void __cmpxchg_wrong_size(void); /* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway - * Note 2: xchg has side effect, so that attribute volatile is necessary, - * but generally the primitive is invalid, *ptr is output argument. --ANK + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. + * Since this is generally used to protect other memory information, we + * use "asm volatile" and "memory" clobbers to prevent gcc from moving + * information around. */ #define __xchg(x, ptr, size) \ ({ \ __typeof(*(ptr)) __x = (x); \ switch (size) { \ case 1: \ - asm volatile("xchgb %b0,%1" \ - : "=q" (__x), "+m" (*__xg(ptr)) \ + { \ + volatile u8 *__ptr = (volatile u8 *)(ptr); \ + asm volatile("xchgb %0,%1" \ + : "=q" (__x), "+m" (*__ptr) \ : "0" (__x) \ : "memory"); \ break; \ + } \ case 2: \ - asm volatile("xchgw %w0,%1" \ - : "=r" (__x), "+m" (*__xg(ptr)) \ + { \ + volatile u16 *__ptr = (volatile u16 *)(ptr); \ + asm volatile("xchgw %0,%1" \ + : "=r" (__x), "+m" (*__ptr) \ : "0" (__x) \ : "memory"); \ break; \ + } \ case 4: \ - asm volatile("xchgl %k0,%1" \ - : "=r" (__x), "+m" (*__xg(ptr)) \ + { \ + volatile u32 *__ptr = (volatile u32 *)(ptr); \ + asm volatile("xchgl %0,%1" \ + : "=r" (__x), "+m" (*__ptr) \ : "0" (__x) \ : "memory"); \ break; \ + } \ case 8: \ + { \ + volatile u64 *__ptr = (volatile u64 *)(ptr); \ asm volatile("xchgq %0,%1" \ - : "=r" (__x), "+m" (*__xg(ptr)) \ + : "=r" (__x), "+m" (*__ptr) \ : "0" (__x) \ : "memory"); \ break; \ + } \ default: \ __xchg_wrong_size(); \ } \ @@ -69,29 +80,41 @@ extern void __cmpxchg_wrong_size(void); __typeof__(*(ptr)) __new = (new); \ switch (size) { \ case 1: \ - asm volatile(lock "cmpxchgb %b2,%1" \ - : "=a" (__ret), "+m" (*__xg(ptr)) \ + { \ + volatile u8 *__ptr = (volatile u8 *)(ptr); \ + asm volatile(lock "cmpxchgb %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ : "q" (__new), "0" (__old) \ : "memory"); \ break; \ + } \ case 2: \ - asm volatile(lock "cmpxchgw %w2,%1" \ - : "=a" (__ret), "+m" (*__xg(ptr)) \ + { \ + volatile u16 *__ptr = (volatile u16 *)(ptr); \ + asm volatile(lock "cmpxchgw %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ break; \ + } \ case 4: \ - asm volatile(lock "cmpxchgl %k2,%1" \ - : "=a" (__ret), "+m" (*__xg(ptr)) \ + { \ + volatile u32 *__ptr = (volatile u32 *)(ptr); \ + asm volatile(lock "cmpxchgl %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ break; \ + } \ case 8: \ + { \ + volatile u64 *__ptr = (volatile u64 *)(ptr); \ asm volatile(lock "cmpxchgq %2,%1" \ - : "=a" (__ret), "+m" (*__xg(ptr)) \ + : "=a" (__ret), "+m" (*__ptr) \ : "r" (__new), "0" (__old) \ : "memory"); \ break; \ + } \ default: \ __cmpxchg_wrong_size(); \ } \ -- cgit From a378d9338e8dde78314b3a6ae003de351936c729 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 28 Jul 2010 17:05:11 -0700 Subject: x86, asm: Merge cmpxchg_486_u64() and cmpxchg8b_emu() We have two functions for doing exactly the same thing -- emulating cmpxchg8b on 486 and older hardware -- with different calling conventions, and yet doing the same thing. Drop the C version and use the assembly version, via alternatives, for both the local and non-local versions of cmpxchg8b. Signed-off-by: H. Peter Anvin LKML-Reference: --- arch/x86/include/asm/cmpxchg_32.h | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index f5bd1fd388ff..284a6e8f7ce1 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -246,8 +246,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, * to simulate the cmpxchg8b on the 80386 and 80486 CPU. */ -extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); - #define cmpxchg64(ptr, o, n) \ ({ \ __typeof__(*(ptr)) __ret; \ @@ -265,20 +263,20 @@ extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); __ret; }) - -#define cmpxchg64_local(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) __ret; \ - if (likely(boot_cpu_data.x86 > 4)) \ - __ret = (__typeof__(*(ptr)))__cmpxchg64_local((ptr), \ - (unsigned long long)(o), \ - (unsigned long long)(n)); \ - else \ - __ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr), \ - (unsigned long long)(o), \ - (unsigned long long)(n)); \ - __ret; \ -}) +#define cmpxchg64_local(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (o); \ + __typeof__(*(ptr)) __new = (n); \ + alternative_io("call cmpxchg8b_emu", \ + "cmpxchg8b (%%esi)" , \ + X86_FEATURE_CX8, \ + "=A" (__ret), \ + "S" ((ptr)), "0" (__old), \ + "b" ((unsigned int)__new), \ + "c" ((unsigned int)(__new>>32)) \ + : "memory"); \ + __ret; }) #endif -- cgit From 7bd1c365fd124624191d49dcc1eb9759d6017ec3 Mon Sep 17 00:00:00 2001 From: Mike Habeck Date: Wed, 12 May 2010 11:14:32 -0700 Subject: x86/PCI: Add option to not assign BAR's if not already assigned The Linux kernel assigns BARs that a BIOS did not assign, most likely to handle broken BIOSes that didn't enumerate the devices correctly. On UV the BIOS purposely doesn't assign I/O BARs for certain devices/ drivers we know don't use them (examples, LSI SAS, Qlogic FC, ...). We purposely don't assign these I/O BARs because I/O Space is a very limited resource. There is only 64k of I/O Space, and in a PCIe topology that space gets divided up into 4k chucks (this is due to the fact that a pci-to-pci bridge's I/O decoder is aligned at 4k)... Thus a system can have at most 16 cards with I/O BARs: (64k / 4k = 16) SGI needs to scale to >16 devices with I/O BARs. So by not assigning I/O BARs on devices we know don't use them, we can do that (iff the kernel doesn't go and assign these BARs that the BIOS purposely didn't assign). This patch will not assign a resource to a device BAR if that BAR was not assigned by the BIOS, and the kernel cmdline option 'pci=nobar' was specified. This patch is closely modeled after the 'pci=norom' option that currently exists in the tree. Signed-off-by: Mike Habeck Signed-off-by: Mike Travis Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index cd2a31dc5fb8..49c7219826f9 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -30,6 +30,7 @@ #define PCI_HAS_IO_ECS 0x40000 #define PCI_NOASSIGN_ROMS 0x80000 #define PCI_ROOT_NO_CRS 0x100000 +#define PCI_NOASSIGN_BARS 0x200000 extern unsigned int pci_probe; extern unsigned long pirq_table_addr; -- cgit From 9792db6174d9927700ed288e6d74b9391bf785d1 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 29 Jul 2010 17:13:42 -0700 Subject: x86, cpu: Package Level Thermal Control, Power Limit Notification definitions Add package level thermal and power limit feature support. The two MSRs and features are new starting with Intel's Sandy Bridge processor. Please check Intel 64 and IA-32 Architectures SDMV Vol 3A 14.5.6 Power Limit Notification and 14.6 Package Level Thermal Management. This patch also fixes a bug which defines reverse THERM_INT_LOW_ENABLE bit and THERM_INT_HIGH_ENABLE bit. [ hpa: fixed up against current tip:x86/cpu ] Signed-off-by: Fenghua Yu LKML-Reference: <1280448826-12004-2-git-send-email-fenghua.yu@intel.com> Reviewed-by: Len Brown Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cpufeature.h | 2 ++ arch/x86/include/asm/msr-index.h | 17 +++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 4be50ddd4d79..817aa316b180 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -166,6 +166,8 @@ #define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB (7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ #define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */ +#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ +#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 7cc4a026331c..4ea2a7ca7a4b 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -224,12 +224,14 @@ #define MSR_IA32_THERM_CONTROL 0x0000019a #define MSR_IA32_THERM_INTERRUPT 0x0000019b -#define THERM_INT_LOW_ENABLE (1 << 0) -#define THERM_INT_HIGH_ENABLE (1 << 1) +#define THERM_INT_HIGH_ENABLE (1 << 0) +#define THERM_INT_LOW_ENABLE (1 << 1) +#define THERM_INT_PLN_ENABLE (1 << 24) #define MSR_IA32_THERM_STATUS 0x0000019c #define THERM_STATUS_PROCHOT (1 << 0) +#define THERM_STATUS_POWER_LIMIT (1 << 10) #define MSR_THERM2_CTL 0x0000019d @@ -241,6 +243,17 @@ #define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 +#define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1 + +#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0) +#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10) + +#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2 + +#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0) +#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) +#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) + /* MISC_ENABLE bits: architectural */ #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) #define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) -- cgit From 54e5bc020ce1c959eaa7be18cedb734b6b13745e Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Mon, 28 Jun 2010 22:00:29 -0400 Subject: x86, olpc: Constify an olpc_ofw() arg The arguments passed to OFW shouldn't be modified; update the 'args' argument of olpc_ofw to reflect this. This saves us some later casting away of consts. Signed-off-by: Andres Salomon LKML-Reference: <20100628220029.1555ac24@debian> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/olpc_ofw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h index 3e63d857c48f..08fde475cb3b 100644 --- a/arch/x86/include/asm/olpc_ofw.h +++ b/arch/x86/include/asm/olpc_ofw.h @@ -12,7 +12,7 @@ #define olpc_ofw(name, args, res) \ __olpc_ofw((name), ARRAY_SIZE(args), args, ARRAY_SIZE(res), res) -extern int __olpc_ofw(const char *name, int nr_args, void **args, int nr_res, +extern int __olpc_ofw(const char *name, int nr_args, const void **args, int nr_res, void **res); /* determine whether OFW is available and lives in the proper memory */ -- cgit From 9de41573675cbace09b02ef386f3e9c8739d495c Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 28 Apr 2010 19:15:22 +0300 Subject: KVM: x86 emulator: introduce read cache Introduce read cache which is needed for instruction that require more then one exit to userspace. After returning from userspace the instruction will be re-executed with cached read value. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0b2729bf2070..288cbedcab1c 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -186,6 +186,7 @@ struct decode_cache { unsigned long modrm_val; struct fetch_cache fetch; struct read_cache io_read; + struct read_cache mem_read; }; struct x86_emulate_ctxt { -- cgit From 414e6277fd148f6470261cef50a7fed0d88a2825 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 28 Apr 2010 19:15:26 +0300 Subject: KVM: x86 emulator: handle "far address" source operand ljmp/lcall instruction operand contains address and segment. It can be 10 bytes long. Currently we decode it as two different operands. Fix it by introducing new kind of operand that can hold entire far address. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 288cbedcab1c..69a64a6a36f4 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -143,7 +143,11 @@ struct x86_emulate_ops { struct operand { enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; unsigned int bytes; - unsigned long val, orig_val, *ptr; + unsigned long orig_val, *ptr; + union { + unsigned long val; + char valptr[sizeof(unsigned long) + 2]; + }; }; struct fetch_cache { -- cgit From 35aa5375d407ecadcc3adb5cb31d27044bf7f29f Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 28 Apr 2010 19:15:27 +0300 Subject: KVM: x86 emulator: add (set|get)_dr callbacks to x86_emulate_ops Add (set|get)_dr callbacks to x86_emulate_ops instead of calling them directly. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 ++ arch/x86/include/asm/kvm_host.h | 4 ---- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 69a64a6a36f4..c37296d0e909 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -137,6 +137,8 @@ struct x86_emulate_ops { void (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); int (*cpl)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); + int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); + int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); }; /* Type, address-of, and value of an instruction's operand. */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 76f5483cffec..97774ae3c874 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -591,10 +591,6 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); int emulate_clts(struct kvm_vcpu *vcpu); -int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, - unsigned long *dest); -int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, - unsigned long value); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); -- cgit From 3fb1b5dbd397d16a855c97c3fb80fe6e9196ce7c Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 28 Apr 2010 19:15:28 +0300 Subject: KVM: x86 emulator: add (set|get)_msr callbacks to x86_emulate_ops Add (set|get)_msr callbacks to x86_emulate_ops instead of calling them directly. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index c37296d0e909..f751657be732 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -139,6 +139,8 @@ struct x86_emulate_ops { void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); + int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); + int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); }; /* Type, address-of, and value of an instruction's operand. */ -- cgit From 5951c4423724759906b10a26aa6a8817c4afa615 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 28 Apr 2010 19:15:29 +0300 Subject: KVM: x86 emulator: add get_cached_segment_base() callback to x86_emulate_ops On VMX it is expensive to call get_cached_descriptor() just to get segment base since multiple vmcs_reads are done instead of only one. Introduce new call back get_cached_segment_base() for efficiency. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index f751657be732..df53ba2294b6 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -132,6 +132,7 @@ struct x86_emulate_ops { int seg, struct kvm_vcpu *vcpu); u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); + unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu); void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); void (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); -- cgit From 0f12244fe70e8a94a491f6cd7ed70a352ab6c26c Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 28 Apr 2010 19:15:31 +0300 Subject: KVM: x86 emulator: make set_cr() callback return error if it fails Make set_cr() callback return error if it fails instead of injecting #GP behind emulator's back. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index df53ba2294b6..6c4f4918db5e 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -135,7 +135,7 @@ struct x86_emulate_ops { unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu); void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); - void (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); + int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); int (*cpl)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); -- cgit From e680080e653b8c8725ca620bf22a5f8480f40cb5 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 28 Apr 2010 19:15:33 +0300 Subject: KVM: x86 emulator: fix X86EMUL_RETRY_INSTR and X86EMUL_CMPXCHG_FAILED values Currently X86EMUL_PROPAGATE_FAULT, X86EMUL_RETRY_INSTR and X86EMUL_CMPXCHG_FAILED have the same value so caller cannot distinguish why function such as emulator_cmpxchg_emulated() (which can return both X86EMUL_PROPAGATE_FAULT and X86EMUL_CMPXCHG_FAILED) failed. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 6c4f4918db5e..0cf4311db0d6 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -51,8 +51,9 @@ struct x86_emulate_ctxt; #define X86EMUL_UNHANDLEABLE 1 /* Terminate emulation but return success to the caller. */ #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ -#define X86EMUL_RETRY_INSTR 2 /* retry the instruction for some reason */ -#define X86EMUL_CMPXCHG_FAILED 2 /* cmpxchg did not see expected value */ +#define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ +#define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ + struct x86_emulate_ops { /* * read_std: Read bytes of standard (non-emulated/special) memory. -- cgit From c3cd7ffaf57ae6ead5b394cebaeb76164059a57f Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 28 Apr 2010 19:15:35 +0300 Subject: KVM: x86 emulator: x86_emulate_insn() return -1 only in case of emulation failure Currently emulator returns -1 when emulation failed or IO is needed. Caller tries to guess whether emulation failed by looking at other variables. Make it easier for caller to recognise error condition by always returning -1 in case of failure. For this new emulator internal return value X86EMUL_IO_NEEDED is introduced. It is used to distinguish between error condition (which returns X86EMUL_UNHANDLEABLE) and condition that requires IO exit to userspace to continue emulation. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0cf4311db0d6..777240d4524e 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -53,6 +53,7 @@ struct x86_emulate_ctxt; #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ #define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ #define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ +#define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ struct x86_emulate_ops { /* -- cgit From f181b96d4c769b8915849eb9070c18116fd8d44e Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 28 Apr 2010 19:15:36 +0300 Subject: KVM: remove export of emulator_write_emulated() It is not called directly outside of the file it's defined in anymore. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 97774ae3c874..2ca1867ed97a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -628,11 +628,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu); void fx_init(struct kvm_vcpu *vcpu); -int emulator_write_emulated(unsigned long addr, - const void *val, - unsigned int bytes, - struct kvm_vcpu *vcpu); - void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes, -- cgit From 8fe681e984b6505d4d12125c0776399304803ec7 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 28 Apr 2010 19:15:37 +0300 Subject: KVM: do not inject #PF in (read|write)_emulated() callbacks Return error to x86 emulator instead of injection exception behind its back. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 777240d4524e..b7e00cb21c6e 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -94,6 +94,7 @@ struct x86_emulate_ops { int (*read_emulated)(unsigned long addr, void *val, unsigned int bytes, + unsigned int *error, struct kvm_vcpu *vcpu); /* @@ -106,6 +107,7 @@ struct x86_emulate_ops { int (*write_emulated)(unsigned long addr, const void *val, unsigned int bytes, + unsigned int *error, struct kvm_vcpu *vcpu); /* @@ -120,6 +122,7 @@ struct x86_emulate_ops { const void *old, const void *new, unsigned int bytes, + unsigned int *error, struct kvm_vcpu *vcpu); int (*pio_in_emulated)(int size, unsigned short port, void *val, -- cgit From ef050dc0390176ec6888f373edb776587c88be3d Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 28 Apr 2010 19:15:40 +0300 Subject: KVM: x86 emulator: set RFLAGS outside x86 emulator code Removes the need for set_flags() callback. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b7e00cb21c6e..a87d95f09572 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -142,7 +142,6 @@ struct x86_emulate_ops { ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); int (*cpl)(struct kvm_vcpu *vcpu); - void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); -- cgit From 54b8486f469475d6c8e8aec917b91239a54eb8c8 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 28 Apr 2010 19:15:44 +0300 Subject: KVM: x86 emulator: do not inject exception directly into vcpu Return exception as a result of instruction emulation and handle injection in KVM code. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index a87d95f09572..51cfd730ac5d 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -216,6 +216,12 @@ struct x86_emulate_ctxt { int interruptibility; bool restart; /* restart string instruction after writeback */ + + int exception; /* exception that happens during emulation or -1 */ + u32 error_code; /* error code for exception */ + bool error_code_valid; + unsigned long cr2; /* faulted address in case of #PF */ + /* decode cache */ struct decode_cache decode; }; -- cgit From eec4b140c924b4c650e9a89e01d223266490e325 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 5 May 2010 16:04:44 +0200 Subject: KVM: SVM: Allow EFER.LMSLE to be set with nested svm This patch enables setting of efer bit 13 which is allowed in all SVM capable processors. This is necessary for the SLES11 version of Xen 4.0 to boot with nested svm. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/msr-index.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 8c7ae4318629..509a42187dc2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -20,6 +20,7 @@ #define _EFER_LMA 10 /* Long mode active (read-only) */ #define _EFER_NX 11 /* No execute enable */ #define _EFER_SVME 12 /* Enable virtualization */ +#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ #define EFER_SCE (1<<_EFER_SCE) @@ -27,6 +28,7 @@ #define EFER_LMA (1<<_EFER_LMA) #define EFER_NX (1<<_EFER_NX) #define EFER_SVME (1<<_EFER_SVME) +#define EFER_LMSLE (1<<_EFER_LMSLE) #define EFER_FFXSR (1<<_EFER_FFXSR) /* Intel MSRs. Some also available on other CPUs */ -- cgit From 6d77dbfc88e37c9efd5c5dd18445cfe819ae17ea Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 10 May 2010 11:16:56 +0300 Subject: KVM: inject #UD if instruction emulation fails and exit to userspace Do not kill VM when instruction emulation fails. Inject #UD and report failure to userspace instead. Userspace may choose to reenter guest if vcpu is in userspace (cpl == 3) in which case guest OS will kill offending process and continue running. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2ca1867ed97a..0c06148fa3b1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -576,7 +576,6 @@ enum emulation_result { #define EMULTYPE_SKIP (1 << 2) int emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, u16 error_code, int emulation_type); -void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); -- cgit From 5ee481da7b62a992b91f958bf26aaaa92354c170 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 17 May 2010 17:22:23 +0800 Subject: x86: Export FPU API for KVM use Also add some constants. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/i387.h | 2 ++ arch/x86/include/asm/xsave.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index c991b3a7b904..815c5b2b9f57 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -482,6 +482,8 @@ static inline void fpu_copy(struct fpu *dst, struct fpu *src) memcpy(dst->state, src->state, xstate_size); } +extern void fpu_finit(struct fpu *fpu); + #endif /* __ASSEMBLY__ */ #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 2c4390cae228..29ee4e4c64cf 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -13,6 +13,9 @@ #define FXSAVE_SIZE 512 +#define XSTATE_YMM_SIZE 256 +#define XSTATE_YMM_OFFSET (512 + 64) + /* * These are the features that the OS can handle currently. */ -- cgit From 7cf30855e02be7a207ffebb8b9350986f2ba83e9 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 17 May 2010 17:08:27 +0800 Subject: KVM: x86: Use unlazy_fpu() for host FPU We can avoid unnecessary fpu load when userspace process didn't use FPU frequently. Derived from Avi's idea. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0c06148fa3b1..d93601c52902 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -301,7 +301,6 @@ struct kvm_vcpu_arch { unsigned long mmu_seq; } update_pte; - struct i387_fxsave_struct host_fx_image; struct i387_fxsave_struct guest_fx_image; gva_t mmio_fault_cr2; -- cgit From 98918833a3e21ffc5619535955e7a003cb788163 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 17 May 2010 17:08:28 +0800 Subject: KVM: x86: Use FPU API Convert KVM to use generic FPU API. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d93601c52902..d08bb4a202de 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -301,7 +301,7 @@ struct kvm_vcpu_arch { unsigned long mmu_seq; } update_pte; - struct i387_fxsave_struct guest_fx_image; + struct fpu guest_fpu; gva_t mmio_fault_cr2; struct kvm_pio_request pio; @@ -708,21 +708,6 @@ static inline unsigned long read_msr(unsigned long msr) } #endif -static inline void kvm_fx_save(struct i387_fxsave_struct *image) -{ - asm("fxsave (%0)":: "r" (image)); -} - -static inline void kvm_fx_restore(struct i387_fxsave_struct *image) -{ - asm("fxrstor (%0)":: "r" (image)); -} - -static inline void kvm_fx_finit(void) -{ - asm("finit"); -} - static inline u32 get_rdx_init_val(void) { return 0x600; /* P6 family */ -- cgit From c8174f7b35b3018c4c7b3237ed1c792e454fd5c3 Mon Sep 17 00:00:00 2001 From: Mohammed Gamal Date: Mon, 24 May 2010 01:01:04 +0300 Subject: KVM: VMX: Add constant for invalid guest state exit reason For the sake of completeness, this patch adds a symbolic constant for VMX exit reason 0x21 (invalid guest state). Signed-off-by: Mohammed Gamal Signed-off-by: Avi Kivity --- arch/x86/include/asm/vmx.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 9e6779f7cf2d..104cf86a7562 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -257,6 +257,7 @@ enum vmcs_field { #define EXIT_REASON_IO_INSTRUCTION 30 #define EXIT_REASON_MSR_READ 31 #define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_INVALID_STATE 33 #define EXIT_REASON_MWAIT_INSTRUCTION 36 #define EXIT_REASON_MONITOR_INSTRUCTION 39 #define EXIT_REASON_PAUSE_INSTRUCTION 40 -- cgit From 10ab25cd6bf7ee4e5a55d81f203f7dc1a855c27e Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 25 May 2010 16:01:50 +0200 Subject: KVM: x86: Propagate fpu_alloc errors Memory allocation may fail. Propagate such errors. Signed-off-by: Jan Kiszka Reviewed-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d08bb4a202de..0cd0f2923af5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -624,7 +624,7 @@ int kvm_pic_set_irq(void *opaque, int irq, int level); void kvm_inject_nmi(struct kvm_vcpu *vcpu); -void fx_init(struct kvm_vcpu *vcpu); +int fx_init(struct kvm_vcpu *vcpu); void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, -- cgit From 518c8aee5ca74fc03273fc6b4893cf456d65d545 Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Fri, 4 Jun 2010 08:51:39 +0800 Subject: KVM: VMX: Make sure single type invvpid is supported before issuing invvpid instruction According to SDM, we need check whether single-context INVVPID type is supported before issuing invvpid instruction. Signed-off-by: Gui Jianfeng Reviewed-by: Sheng Yang Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/vmx.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 104cf86a7562..b4e28400c9ff 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -376,6 +376,8 @@ enum vmcs_field { #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) +#define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */ + #define VMX_EPT_DEFAULT_GAW 3 #define VMX_EPT_MAX_GAW 0x4 #define VMX_EPT_MT_EPTE_SHIFT 3 -- cgit From b9d762fa79f541ab480cdb733b46fdb0b4471c2d Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Mon, 7 Jun 2010 10:32:29 +0800 Subject: KVM: VMX: Add all-context INVVPID type support Add all-context INVVPID type support. Signed-off-by: Gui Jianfeng Signed-off-by: Avi Kivity --- arch/x86/include/asm/vmx.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index b4e28400c9ff..96a5886d384e 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -377,6 +377,7 @@ enum vmcs_field { #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) #define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */ +#define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ #define VMX_EPT_DEFAULT_GAW 3 #define VMX_EPT_MAX_GAW 0x4 -- cgit From 2acf923e38fb6a4ce0c57115decbb38d334902ac Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 10 Jun 2010 11:27:12 +0800 Subject: KVM: VMX: Enable XSAVE/XRSTOR for guest This patch enable guest to use XSAVE/XRSTOR instructions. We assume that host_xcr0 would use all possible bits that OS supported. And we loaded xcr0 in the same way we handled fpu - do it as late as we can. Signed-off-by: Dexuan Cui Signed-off-by: Sheng Yang Reviewed-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/include/asm/vmx.h | 1 + 2 files changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0cd0f2923af5..91631b8b2090 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -302,6 +302,7 @@ struct kvm_vcpu_arch { } update_pte; struct fpu guest_fpu; + u64 xcr0; gva_t mmio_fault_cr2; struct kvm_pio_request pio; @@ -605,6 +606,7 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); +int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr); int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 96a5886d384e..9f0cbd987d50 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -267,6 +267,7 @@ enum vmcs_field { #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 /* * Interruption-information format -- cgit From 49a9b07edcf4aff159c1f3d3a27e58cf38bc27cd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 10 Jun 2010 17:02:14 +0300 Subject: KVM: Fix mov cr0 #GP at wrong instruction On Intel, we call skip_emulated_instruction() even if we injected a #GP, resulting in the #GP pointing at the wrong address. Fix by injecting the exception and skipping the instruction at the same place, so we can do just one or the other. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 91631b8b2090..b23708450210 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -597,7 +597,7 @@ int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, bool has_error_code, u32 error_code); -void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); +int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); -- cgit From a83b29c6ad6d6497e569edbc29e556a384cebddd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 10 Jun 2010 17:02:15 +0300 Subject: KVM: Fix mov cr4 #GP at wrong instruction On Intel, we call skip_emulated_instruction() even if we injected a #GP, resulting in the #GP pointing at the wrong address. Fix by injecting the exception and skipping the instruction at the same place, so we can do just one or the other. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b23708450210..ea8c319cdffc 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -599,7 +599,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); -void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); -- cgit From 2390218b6aa2eb3784b0a82fa811c19097dc793a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 10 Jun 2010 17:02:16 +0300 Subject: KVM: Fix mov cr3 #GP at wrong instruction On Intel, we call skip_emulated_instruction() even if we injected a #GP, resulting in the #GP pointing at the wrong address. Fix by injecting the exception and skipping the instruction at the same place, so we can do just one or the other. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ea8c319cdffc..c2813d658f3e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -598,7 +598,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, bool has_error_code, u32 error_code); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); -void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); +int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); -- cgit From 2d5b5a665508c60577c1088e0405850a965b6795 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Sun, 13 Jun 2010 17:29:39 +0800 Subject: KVM: x86: XSAVE/XRSTOR live migration support This patch enable save/restore of xsave state. Signed-off-by: Sheng Yang Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm.h | 22 ++++++++++++++++++++++ arch/x86/include/asm/xsave.h | 7 +++++-- 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index ff90055c7f0b..4d8dcbdfc120 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -22,6 +22,8 @@ #define __KVM_HAVE_XEN_HVM #define __KVM_HAVE_VCPU_EVENTS #define __KVM_HAVE_DEBUGREGS +#define __KVM_HAVE_XSAVE +#define __KVM_HAVE_XCRS /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 @@ -299,4 +301,24 @@ struct kvm_debugregs { __u64 reserved[9]; }; +/* for KVM_CAP_XSAVE */ +struct kvm_xsave { + __u32 region[1024]; +}; + +#define KVM_MAX_XCRS 16 + +struct kvm_xcr { + __u32 xcr; + __u32 reserved; + __u64 value; +}; + +struct kvm_xcrs { + __u32 nr_xcrs; + __u32 flags; + struct kvm_xcr xcrs[KVM_MAX_XCRS]; + __u64 padding[16]; +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 29ee4e4c64cf..32c36668fa7b 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -13,8 +13,11 @@ #define FXSAVE_SIZE 512 -#define XSTATE_YMM_SIZE 256 -#define XSTATE_YMM_OFFSET (512 + 64) +#define XSAVE_HDR_SIZE 64 +#define XSAVE_HDR_OFFSET FXSAVE_SIZE + +#define XSAVE_YMM_SIZE 256 +#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) /* * These are the features that the OS can handle currently. -- cgit From be71e061d15c0aad4f8c2606f76c57b8a19792fd Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 11 Jun 2010 21:31:38 +0800 Subject: KVM: MMU: don't mark pte notrap if it's just sync transient If the sync-sp just sync transient, don't mark its pte notrap Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c2813d658f3e..2ec2e27a403e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -241,7 +241,7 @@ struct kvm_mmu { void (*prefetch_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page); int (*sync_page)(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *sp); + struct kvm_mmu_page *sp, bool clear_unsync); void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); hpa_t root_hpa; int root_level; -- cgit From a1f4d39500ad8ed61825eff061debff42386ab5b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 21 Jun 2010 11:44:20 +0300 Subject: KVM: Remove memory alias support As advertised in feature-removal-schedule.txt. Equivalent support is provided by overlapping memory regions. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2ec2e27a403e..a57cdeacc4d2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -69,8 +69,6 @@ #define IOPL_SHIFT 12 -#define KVM_ALIAS_SLOTS 4 - #define KVM_PERMILLE_MMU_PAGES 20 #define KVM_MIN_ALLOC_MMU_PAGES 64 #define KVM_MMU_HASH_SHIFT 10 @@ -362,24 +360,7 @@ struct kvm_vcpu_arch { u64 hv_vapic; }; -struct kvm_mem_alias { - gfn_t base_gfn; - unsigned long npages; - gfn_t target_gfn; -#define KVM_ALIAS_INVALID 1UL - unsigned long flags; -}; - -#define KVM_ARCH_HAS_UNALIAS_INSTANTIATION - -struct kvm_mem_aliases { - struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; - int naliases; -}; - struct kvm_arch { - struct kvm_mem_aliases *aliases; - unsigned int n_free_mmu_pages; unsigned int n_requested_mmu_pages; unsigned int n_alloc_mmu_pages; @@ -655,8 +636,6 @@ void kvm_disable_tdp(void); int complete_pio(struct kvm_vcpu *vcpu); bool kvm_check_iopl(struct kvm_vcpu *vcpu); -struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); - static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) { struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); -- cgit From f5f48ee15c2ee3e44cf429e34b16c6fa9b900246 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 30 Jun 2010 12:25:15 +0800 Subject: KVM: VMX: Execute WBINVD to keep data consistency with assigned devices Some guest device driver may leverage the "Non-Snoop" I/O, and explicitly WBINVD or CLFLUSH to a RAM space. Since migration may occur before WBINVD or CLFLUSH, we need to maintain data consistency either by: 1: flushing cache (wbinvd) when the guest is scheduled out if there is no wbinvd exit, or 2: execute wbinvd on all dirty physical CPUs when guest wbinvd exits. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Sheng Yang Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a57cdeacc4d2..2bda62485c4c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -358,6 +359,8 @@ struct kvm_vcpu_arch { /* fields used by HYPER-V emulation */ u64 hv_vapic; + + cpumask_var_t wbinvd_dirty_mask; }; struct kvm_arch { @@ -514,6 +517,8 @@ struct kvm_x86_ops { void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); + bool (*has_wbinvd_exit)(void); + const struct trace_print_flags *exit_reasons_str; }; @@ -571,6 +576,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); int emulate_clts(struct kvm_vcpu *vcpu); +int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); -- cgit From 828554136bbacae6e39fc31b9cd7e7c660ad7530 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 1 Jul 2010 16:00:11 +0200 Subject: KVM: Remove unnecessary divide operations This patch converts unnecessary divide and modulo operations in the KVM large page related code into logical operations. This allows to convert gfn_t to u64 while not breaking 32 bit builds. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2bda62485c4c..50c79b9f5c38 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -44,7 +44,8 @@ /* KVM Hugepage definitions for x86 */ #define KVM_NR_PAGE_SIZES 3 -#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9)) +#define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9) +#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) -- cgit From dd180b3e90253cb4ca95d603a8c17413f8daec69 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Sat, 3 Jul 2010 16:02:42 +0800 Subject: KVM: VMX: fix tlb flush with invalid root Commit 341d9b535b6c simplify reload logic while entry guest mode, it can avoid unnecessary sync-root if KVM_REQ_MMU_RELOAD and KVM_REQ_MMU_SYNC both set. But, it cause a issue that when we handle 'KVM_REQ_TLB_FLUSH', the root is invalid, it is triggered during my test: Kernel BUG at ffffffffa00212b8 [verbose debug info unavailable] ...... Fixed by directly return if the root is not ready. Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 50c79b9f5c38..502e53f999cf 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -40,6 +40,8 @@ 0xFFFFFF0000000000ULL) #define INVALID_PAGE (~(hpa_t)0) +#define VALID_PAGE(x) ((x) != INVALID_PAGE) + #define UNMAPPED_GVA (~(gpa_t)0) /* KVM Hugepage definitions for x86 */ -- cgit From e8c534ec068af1a0845aceda373a9bfd2de62030 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Tue, 27 Jul 2010 18:53:35 +0200 Subject: x86: Fix keeping track of AMD C1E Accomodate the original C1E-aware idle routine to the different times during boot when the BIOS enables C1E. While at it, remove the synthetic CPUID flag in favor of a single global setting which denotes C1E status on the system. [ hpa: changed c1e_enabled to be a bool; clarified cpu bit 3:21 comment ] Signed-off-by: Michal Schmidt LKML-Reference: <20100727165335.GA11630@aftab> Signed-off-by: Borislav Petkov Signed-off-by: H. Peter Anvin Acked-by: Thomas Gleixner --- arch/x86/include/asm/acpi.h | 2 +- arch/x86/include/asm/cpufeature.h | 2 +- arch/x86/include/asm/processor.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index aa2c39d968fc..92091de11113 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -134,7 +134,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) boot_cpu_data.x86_model <= 0x05 && boot_cpu_data.x86_mask < 0x0A) return 1; - else if (boot_cpu_has(X86_FEATURE_AMDC1E)) + else if (c1e_detected) return 1; else return max_cstate; diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 817aa316b180..0b205b8a4308 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -89,7 +89,7 @@ #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */ #define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */ #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ -#define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */ + /* 21 available, was AMD_C1E */ #define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d85637bb9505..325b7bdbebaa 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -762,6 +762,7 @@ extern void init_c1e_mask(void); extern unsigned long boot_option_idle_override; extern unsigned long idle_halt; extern unsigned long idle_nomwait; +extern bool c1e_detected; /* * on systems with caches, caches must be flashed as the absolute -- cgit From 35f2915c3bd0cd6950bdd9d461de565e8feae852 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Tue, 1 Jun 2010 13:07:34 +0100 Subject: intel_scu_ipc: add definitions for vRTC related command Signed-off-by: Feng Tang Signed-off-by: Alan Cox Signed-off-by: Matthew Garrett --- arch/x86/include/asm/intel_scu_ipc.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h index 4470c9ad4a3e..03200452069b 100644 --- a/arch/x86/include/asm/intel_scu_ipc.h +++ b/arch/x86/include/asm/intel_scu_ipc.h @@ -1,6 +1,12 @@ #ifndef _ASM_X86_INTEL_SCU_IPC_H_ #define _ASM_X86_INTEL_SCU_IPC_H_ +#define IPCMSG_VRTC 0xFA /* Set vRTC device */ + +/* Command id associated with message IPCMSG_VRTC */ +#define IPC_CMD_VRTC_SETTIME 1 /* Set time */ +#define IPC_CMD_VRTC_SETALARM 2 /* Set alarm */ + /* Read single register */ int intel_scu_ipc_ioread8(u16 addr, u8 *data); -- cgit From 804f8681a99da2aa49bd7f0dab3750848d1ab1bc Mon Sep 17 00:00:00 2001 From: Sreedhara DS Date: Mon, 26 Jul 2010 10:03:10 +0100 Subject: Remove indirect read write api support. The firmware of production devices does not support this interface so this is dead code. Signed-off-by: Sreedhara DS Signed-off-by: Alan Cox Signed-off-by: Matthew Garrett --- arch/x86/include/asm/intel_scu_ipc.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h index 03200452069b..29f66793cc55 100644 --- a/arch/x86/include/asm/intel_scu_ipc.h +++ b/arch/x86/include/asm/intel_scu_ipc.h @@ -34,20 +34,6 @@ int intel_scu_ipc_writev(u16 *addr, u8 *data, int len); /* Update single register based on the mask */ int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask); -/* - * Indirect register read - * Can be used when SCCB(System Controller Configuration Block) register - * HRIM(Honor Restricted IPC Messages) is set (bit 23) - */ -int intel_scu_ipc_register_read(u32 addr, u32 *data); - -/* - * Indirect register write - * Can be used when SCCB(System Controller Configuration Block) register - * HRIM(Honor Restricted IPC Messages) is set (bit 23) - */ -int intel_scu_ipc_register_write(u32 addr, u32 data); - /* Issue commands to the SCU with or without data */ int intel_scu_ipc_simple_command(int cmd, int sub); int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen, -- cgit From 12bfa3de63504d879ae427ec1f2884fc46556157 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 5 Aug 2010 09:22:20 -0500 Subject: kgdb,x86: Individual register get/set for x86 Implement the ability to individually get and set registers for kdb and kgdb for x86. Signed-off-by: Jason Wessel Acked-by: H. Peter Anvin CC: Ingo Molnar CC: x86@kernel.org --- arch/x86/include/asm/kgdb.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h index 006da3687cdc..396f5b5fc4d7 100644 --- a/arch/x86/include/asm/kgdb.h +++ b/arch/x86/include/asm/kgdb.h @@ -39,9 +39,11 @@ enum regnames { GDB_FS, /* 14 */ GDB_GS, /* 15 */ }; +#define GDB_ORIG_AX 41 +#define DBG_MAX_REG_NUM 16 #define NUMREGBYTES ((GDB_GS+1)*4) #else /* ! CONFIG_X86_32 */ -enum regnames64 { +enum regnames { GDB_AX, /* 0 */ GDB_BX, /* 1 */ GDB_CX, /* 2 */ @@ -59,15 +61,15 @@ enum regnames64 { GDB_R14, /* 14 */ GDB_R15, /* 15 */ GDB_PC, /* 16 */ + GDB_PS, /* 17 */ + GDB_CS, /* 18 */ + GDB_SS, /* 19 */ }; - -enum regnames32 { - GDB_PS = 34, - GDB_CS, - GDB_SS, -}; -#define NUMREGBYTES ((GDB_SS+1)*4) -#endif /* CONFIG_X86_32 */ +#define GDB_ORIG_AX 57 +#define DBG_MAX_REG_NUM 20 +/* 17 64 bit regs and 3 32 bit regs */ +#define NUMREGBYTES ((17 * 8) + (3 * 4)) +#endif /* ! CONFIG_X86_32 */ static inline void arch_kgdb_breakpoint(void) { -- cgit From 7e005f79791dcd58436c88ded4a7f5aed1b82147 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 31 May 2010 15:59:04 +0900 Subject: remove needless ISA_DMA_THRESHOLD Architectures don't need to define ISA_DMA_THRESHOLD anymore. Signed-off-by: FUJITA Tomonori Acked-by: James Bottomley Acked-by: David Howells Signed-off-by: Jens Axboe --- arch/x86/include/asm/scatterlist.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h index fb0b1874396f..4240878b9d76 100644 --- a/arch/x86/include/asm/scatterlist.h +++ b/arch/x86/include/asm/scatterlist.h @@ -3,7 +3,6 @@ #include -#define ISA_DMA_THRESHOLD (0x00ffffff) #define ARCH_HAS_SG_CHAIN #endif /* _ASM_X86_SCATTERLIST_H */ -- cgit From 597781f3e51f48ef8e67be772196d9e9673752c4 Mon Sep 17 00:00:00 2001 From: Cesar Eduardo Barros Date: Mon, 9 Aug 2010 17:18:32 -0700 Subject: kmap_atomic: make kunmap_atomic() harder to misuse kunmap_atomic() is currently at level -4 on Rusty's "Hard To Misuse" list[1] ("Follow common convention and you'll get it wrong"), except in some architectures when CONFIG_DEBUG_HIGHMEM is set[2][3]. kunmap() takes a pointer to a struct page; kunmap_atomic(), however, takes takes a pointer to within the page itself. This seems to once in a while trip people up (the convention they are following is the one from kunmap()). Make it much harder to misuse, by moving it to level 9 on Rusty's list[4] ("The compiler/linker won't let you get it wrong"). This is done by refusing to build if the type of its first argument is a pointer to a struct page. The real kunmap_atomic() is renamed to kunmap_atomic_notypecheck() (which is what you would call in case for some strange reason calling it with a pointer to a struct page is not incorrect in your code). The previous version of this patch was compile tested on x86-64. [1] http://ozlabs.org/~rusty/index.cgi/tech/2008-04-01.html [2] In these cases, it is at level 5, "Do it right or it will always break at runtime." [3] At least mips and powerpc look very similar, and sparc also seems to share a common ancestor with both; there seems to be quite some degree of copy-and-paste coding here. The include/asm/highmem.h file for these three archs mention x86 CPUs at its top. [4] http://ozlabs.org/~rusty/index.cgi/tech/2008-03-30.html [5] As an aside, could someone tell me why mn10300 uses unsigned long as the first parameter of kunmap_atomic() instead of void *? Signed-off-by: Cesar Eduardo Barros Cc: Russell King (arch/arm) Cc: Ralf Baechle (arch/mips) Cc: David Howells (arch/frv, arch/mn10300) Cc: Koichi Yasutake (arch/mn10300) Cc: Kyle McMartin (arch/parisc) Cc: Helge Deller (arch/parisc) Cc: "James E.J. Bottomley" (arch/parisc) Cc: Benjamin Herrenschmidt (arch/powerpc) Cc: Paul Mackerras (arch/powerpc) Cc: "David S. Miller" (arch/sparc) Cc: Thomas Gleixner (arch/x86) Cc: Ingo Molnar (arch/x86) Cc: "H. Peter Anvin" (arch/x86) Cc: Arnd Bergmann (include/asm-generic) Cc: Rusty Russell ("Hard To Misuse" list) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/highmem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index a726650fc80f..8caac76ac324 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -61,7 +61,7 @@ void *kmap(struct page *page); void kunmap(struct page *page); void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); void *kmap_atomic(struct page *page, enum km_type type); -void kunmap_atomic(void *kvaddr, enum km_type type); +void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type); void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); struct page *kmap_atomic_to_page(void *ptr); -- cgit From 4e60c86bd9e5a7110ed28874d0b6592186550ae8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 9 Aug 2010 17:19:03 -0700 Subject: gcc-4.6: mm: fix unused but set warnings No real bugs, just some dead code and some fixups. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable_64.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 181be528c612..076052cd62be 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -126,8 +126,8 @@ static inline int pgd_large(pgd_t pgd) { return 0; } /* x86-64 always has all page tables mapped. */ #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) #define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address)) -#define pte_unmap(pte) /* NOP */ -#define pte_unmap_nested(pte) /* NOP */ +#define pte_unmap(pte) ((void)(pte))/* NOP */ +#define pte_unmap_nested(pte) ((void)(pte)) /* NOP */ #define update_mmu_cache(vma, address, ptep) do { } while (0) -- cgit From 8fd49936a8cac246fc9ed85508556c82cd44cf68 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 11 Aug 2010 15:37:41 +0900 Subject: x86: Document __phys_reloc_hide() usage in __pa_symbol() Until all supported versions of gcc recognize -fno-strict-overflow, we should keep the RELOC_HIDE() magic in __pa_symbol(). Comment it. Signed-off-by: Namhyung Kim LKML-Reference: <1281508661-29507-1-git-send-email-namhyung@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 625c3f0e741a..8ca82839288a 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -37,6 +37,13 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, #define __pa_nodebug(x) __phys_addr_nodebug((unsigned long)(x)) /* __pa_symbol should be used for C visible symbols. This seems to be the official gcc blessed way to do such arithmetic. */ +/* + * We need __phys_reloc_hide() here because gcc may assume that there is no + * overflow during __pa() calculation and can optimize it unexpectedly. + * Newer versions of gcc provide -fno-strict-overflow switch to handle this + * case properly. Once all supported versions of gcc understand it, we can + * remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated) + */ #define __pa_symbol(x) __pa(__phys_reloc_hide((unsigned long)(x))) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -- cgit From 4565f0170dfc849b3629c27d769db800467baa62 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Aug 2010 18:03:22 -0700 Subject: dma-mapping: unify dma_get_cache_alignment implementations dma_get_cache_alignment returns the minimum DMA alignment. Architectures defines it as ARCH_DMA_MINALIGN (formally ARCH_KMALLOC_MINALIGN). So we can unify dma_get_cache_alignment implementations. Note that some architectures implement dma_get_cache_alignment wrongly. dma_get_cache_alignment() should return the minimum DMA alignment. So fully-coherent architectures should return 1. This patch also fixes this issue. Signed-off-by: FUJITA Tomonori Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/dma-mapping.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index ac91eed21061..f9c67e83f648 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -87,13 +87,6 @@ dma_cache_sync(struct device *dev, void *vaddr, size_t size, flush_write_buffers(); } -static inline int dma_get_cache_alignment(void) -{ - /* no easy way to get cache size on all x86, so return the - * maximum possible, to be safe */ - return boot_cpu_data.x86_clflush_size; -} - static inline unsigned long dma_alloc_coherent_mask(struct device *dev, gfp_t gfp) { -- cgit From 3b9c6c11f519718d618f5d7c9508daf78b207f6f Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Aug 2010 18:03:25 -0700 Subject: dma-mapping: remove dma_is_consistent API Architectures implement dma_is_consistent() in different ways (some misinterpret the definition of API in DMA-API.txt). So it hasn't been so useful for drivers. We have only one user of the API in tree. Unlikely out-of-tree drivers use the API. Even if we fix dma_is_consistent() in some architectures, it doesn't look useful at all. It was invented long ago for some old systems that can't allocate coherent memory at all. It's better to export only APIs that are definitely necessary for drivers. Let's remove this API. Signed-off-by: FUJITA Tomonori Cc: James Bottomley Reviewed-by: Konrad Rzeszutek Wilk Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/dma-mapping.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index f9c67e83f648..d4c419f883a0 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -54,7 +54,6 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) -#define dma_is_consistent(d, h) (1) extern int dma_supported(struct device *hwdev, u64 mask); extern int dma_set_mask(struct device *dev, u64 mask); -- cgit From 1d6225e8cc5598f2bc5c992f9c88b1137763e8e1 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Mon, 9 Aug 2010 16:11:22 -0500 Subject: x86, UV: Make kdump avoid stack dumps - fix !CONFIG_KEXEC breakage This replaces Version 1 of this patch, which broke the build when CONFIG_KEXEC and CONFIG_CRASH_DUMP were configured off. In that case the storage for the 'in_crash_kexec' flag was never built. This version defines that flag as 0 if CONFIG_KEXEC is not set. The patch is tested with all combinations of those two options. Signed-off-by: Cliff Wickman Cc: Andrew Morton LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/kdebug.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 7a2910b3512b..5bdfca86581b 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -33,6 +33,11 @@ extern void __show_regs(struct pt_regs *regs, int all); extern void show_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); +#ifdef CONFIG_KEXEC extern int in_crash_kexec; +#else +/* no crash dump is ever in progress if no crash kernel can be kexec'd */ +#define in_crash_kexec 0 +#endif #endif /* _ASM_X86_KDEBUG_H */ -- cgit From c7887325230aec47d47a32562a6e26014a0fafca Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 11 Aug 2010 11:26:22 +0100 Subject: Mark arguments to certain syscalls as being const Mark arguments to certain system calls as being const where they should be but aren't. The list includes: (*) The filename arguments of various stat syscalls, execve(), various utimes syscalls and some mount syscalls. (*) The filename arguments of some syscall helpers relating to the above. (*) The buffer argument of various write syscalls. Signed-off-by: David Howells Acked-by: David S. Miller Signed-off-by: Linus Torvalds --- arch/x86/include/asm/sys_ia32.h | 12 ++++++------ arch/x86/include/asm/syscalls.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index cf4e2e381cba..cb238526a9f1 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -18,13 +18,13 @@ #include /* ia32/sys_ia32.c */ -asmlinkage long sys32_truncate64(char __user *, unsigned long, unsigned long); +asmlinkage long sys32_truncate64(const char __user *, unsigned long, unsigned long); asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long); -asmlinkage long sys32_stat64(char __user *, struct stat64 __user *); -asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *); +asmlinkage long sys32_stat64(const char __user *, struct stat64 __user *); +asmlinkage long sys32_lstat64(const char __user *, struct stat64 __user *); asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); -asmlinkage long sys32_fstatat(unsigned int, char __user *, +asmlinkage long sys32_fstatat(unsigned int, const char __user *, struct stat64 __user *, int); struct mmap_arg_struct32; asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *); @@ -49,12 +49,12 @@ asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t); asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *); asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); -asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); +asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); asmlinkage long sys32_personality(unsigned long); asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); -asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *, +asmlinkage long sys32_execve(const char __user *, compat_uptr_t __user *, compat_uptr_t __user *, struct pt_regs *); asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 5c044b43e9a7..feb2ff9bfc2d 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -23,7 +23,7 @@ long sys_iopl(unsigned int, struct pt_regs *); /* kernel/process.c */ int sys_fork(struct pt_regs *); int sys_vfork(struct pt_regs *); -long sys_execve(char __user *, char __user * __user *, +long sys_execve(const char __user *, char __user * __user *, char __user * __user *, struct pt_regs *); long sys_clone(unsigned long, unsigned long, void __user *, void __user *, struct pt_regs *); -- cgit From bf56fba6703592149e1bcd19220c60eb42dff9b7 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 14 Aug 2010 10:20:19 +0200 Subject: archs: replace unifdef-y with header-y unifdef-y and header-y have same semantic, so drop unifdef-y Signed-off-by: Sam Ravnborg --- arch/x86/include/asm/Kbuild | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 493092efaa3b..6fa90a845e4c 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -3,24 +3,23 @@ include include/asm-generic/Kbuild.asm header-y += boot.h header-y += bootparam.h header-y += debugreg.h +header-y += e820.h +header-y += hw_breakpoint.h +header-y += hyperv.h +header-y += ist.h header-y += ldt.h +header-y += mce.h header-y += msr-index.h +header-y += msr.h +header-y += mtrr.h +header-y += posix_types_32.h +header-y += posix_types_64.h header-y += prctl.h +header-y += processor-flags.h header-y += ptrace-abi.h header-y += sigcontext32.h header-y += ucontext.h -header-y += processor-flags.h -header-y += hw_breakpoint.h -header-y += hyperv.h - -unifdef-y += e820.h -unifdef-y += ist.h -unifdef-y += mce.h -unifdef-y += msr.h -unifdef-y += mtrr.h -unifdef-y += posix_types_32.h -unifdef-y += posix_types_64.h -unifdef-y += unistd_32.h -unifdef-y += unistd_64.h -unifdef-y += vm86.h -unifdef-y += vsyscall.h +header-y += unistd_32.h +header-y += unistd_64.h +header-y += vm86.h +header-y += vsyscall.h -- cgit