Index: linux.prev/Documentation/DocBook/Makefile =================================================================== --- linux.prev.orig/Documentation/DocBook/Makefile +++ linux.prev/Documentation/DocBook/Makefile @@ -10,7 +10,8 @@ DOCBOOKS := wanbook.xml z8530book.xml mc kernel-hacking.xml kernel-locking.xml deviceiobook.xml \ procfs-guide.xml writing_usb_driver.xml \ sis900.xml kernel-api.xml journal-api.xml lsm.xml usb.xml \ - gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml + gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ + genericirq.xml ### # The build process is as follows (targets): Index: linux.prev/Documentation/DocBook/genericirq.tmpl =================================================================== --- /dev/null +++ linux.prev/Documentation/DocBook/genericirq.tmpl @@ -0,0 +1,560 @@ + + + + + + Linux generic IRQ handling + + + + Thomas + Gleixner + +
+ tglx@linutronix.de +
+
+
+ + Ingo + Molnar + +
+ mingo@elte.hu +
+
+
+
+ + + 2005 + Thomas Gleixner + + + 2005 + Ingo Molnar + + + + + This documentation is free software; you can redistribute + it and/or modify it under the terms of the GNU General Public + License version 2 as published by the Free Software Foundation. + + + + This program is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + + + You should have received a copy of the GNU General Public + License along with this program; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, + MA 02111-1307 USA + + + + For more details see the file COPYING in the source + distribution of Linux. + + +
+ + + + + Introduction + + The generic interrupt handling layer is designed to provide a + complete abstraction of interrupt handling for device drivers + and is able to handle all different types of interrupt controller + hardware. Device drivers use generic API function to request, enable, + disable and free interrupts. The drivers do not have to know anything + about interrupt hardware, so they can be used on different hardware + platforms without code changes. + + + This documentation is provided for developers who want to implement + architecture interrupt support based on the Generic IRQ handling layer. + + + + + Rationale + + The original implementation of interrupt handling in Linux is using + the __do_IRQ() super-handler, which must be able to deal with every + type of interrupt logic. This is achieved by an 'interrupt type' + structure and runtime flags to handle special cases. + Furthermore the superhandler assumed a certain type of interrupt + handling hardware and turned out to be not capable of handling all + kind of interrupt controller hardware which can be found through + the architectures. The all in one approach also adds unnecessary + complexity for every user. + + + Originally, Russell King identified different types of handlers to + build a quite universal set for the ARM interrupt handler + implementation in Linux 2.5/2.6. He distiguished between: + + Level type + Edge type + Simple type + + In the SMP world of the __do_IRQ() super-handler another type + was identified: + + Per CPU type + + + + This split implementation of handlers allows to optimize the flow + of the interrupt handling for each specific interrupt type. + This reduces complexitiy in that particular code path and allows + the optimized handling of a given type. + + + The original general implementation uses interrupt_type structures + to differentiate the flow control in the super-handler. This + leads to a mix of flow logic and code related to hardware details. + Russell Kings ARM implementation which replaced the type by a chip + abstraction did the mix the other way around. + + + The natural conclusion was a clean seperation of the 'type flow' + and the 'chip'. Analysing a couple of architecture implementations + reveals that many of them can use a generic set of 'type flow' + implementations and only need to add the chip level specific code. + The seperation is also valuable for the (sub)architectures, + which need specific quirks in the type flow itself, because it + provides a more transparent design. + + + Each interrupt type implementation has assigned its own flow + handler, which should be normally one of the generic + implementations. The flow handler implementation makes it + simple to provide demultiplexing handlers which can be found in + embedded platforms on various architectures. + + + The seperation makes the generic interrupt handling more flexible + and extensible. An (sub)architecture can use a generic type flow + implementation for e.g. 'level type' interrupts and add a + (sub)architecture specific 'edge type' implementation. + + + To make the transition to the new model easier and prevent the + breakage of existing implementations the __do_IRQ() super-handler + is still available. This leads to a kind of duality for the time + being. Over time the new model should achieve a homogeneous + implementation scheme over all architectures with enhanced + maintainability and cleanliness. + + + + Known Bugs And Assumptions + + None (hopefully). + + + + + Abstraction layers + + There are three main levels of abstraction in the interrupt code: + + Highlevel driver API + Abstract interrupt type + Chiplevel hardware encapsulation + + + + The seperation of interrupt type and chip level functionality + provides the most flexible design. This implementation can handle + all kinds of interrupt hardware and the necessary workarounds for + the interrupt types without the need of redundant implementations. + The seperation handles also edge and level type interrupts + on the same hardware chip. + + + Interrupt control flow + + Each interrupt is described by an interrupt description structure + irq_desc. The interrupt is referenced by an 'unsigned int' numeric + value which selects the corresponding interrupt decription structure + in the description structures array. + The description structure contains status information and pointers + to the interrupt type structure and the interrupt chip structure + which are assigned to this interrupt. + + + Whenever an interrupt triggers, the lowlevel arch code calls into + the generic interrupt code by calling desc->handler->handle_irq(). + This highlevel IRQ handling function only uses other + desc->handler primitives which describe the control flow operation + necessary for the interrupt type. These operations are calling + the chip primitives referenced by the assigned chip description + structure. + + + + Highlevel Driver API + + The highlevel Driver API consists of following functions: + + request_irq() + free_irq() + disable_irq() + enable_irq() + disable_irq_nosync() (SMP only) + synchronize_irq() (SMP only) + set_irq_type() + set_irq_wake() + set_irq_data() + set_irq_chip() + set_irq_chip_data() + + See the autogenerated function documentation for details. + + + + Abstract interrupt type + + The 'interrupt type' (struct irq_type) abstraction mainly consists of + methods which implement the 'interrupt handling flow'. The generic + layer provides a set of pre-defined types: + + default_level_type + default_edge_type + default_simple_type + default_percpu_type + + The default type implementations use the generic type handlers. + + handle_level_type + handle_edge_type + handle_simple_type + handle_percpu_type + + The interrupt types (either predefined or architecture specific) are + assigned to specific interrupts by the architecture either during + bootup or during device initialization. + + + Default type implementations + + Helper functions + + The helper functions call the chip primitives and + are used by the default type implementations. + Following helper functions are implemented (simplified excerpt): + +default_enable(irq) +{ + desc->chip->unmask(irq); +} + +default_disable(irq) +{ + desc->chip->mask(irq); +} + +default_ack(irq) +{ + chip->ack(irq); +} + +default_mask_ack(irq) +{ + if (chip->mask_ack) { + chip->mask_ack(irq); + } else { + chip->mask(irq); + chip->ack(irq); + } +} + +noop(irq) +{ +} + +default_set_type(irq, type) +{ + if (desc->chip->set_type) { + if (desc->chip->set_type(irq, type)) + return NULL; + } + + return default_handler for type; +} + + + + + Default Level IRQ type + + The default Level IRQ type implements the functions + + enabledefault_enable + disabledefault_disable + startdefault_mask_ack + enddefault_enable + handle_irqhandle_level_irq + set_typedefault_set_type + + + + + Default Edge IRQ type + + The default Edge IRQ type implements the functions + + enabledefault_enable + disabledefault_disable + startdefault_ack + holddefault_mask_ack + endnoop + handle_irqhandle_edge_irq + set_typedefault_set_type + + + + + Default simple IRQ type + + The default simple IRQ type implements the functions + + enablenoop + disablenoop + handle_irqhandle_simple_irq + + + + + Default per CPU IRQ type + + The default per CPU IRQ type implements the functions + + enabledefault_enable + disabledefault_disable + startdefault_ack + enddefault_enable + handle_irqhandle_percpu_irq + + + + + + Default type handler implementations + + Default Level IRQ type handler + + handle_level_type provides a generic implementation + for level type interrupts. + + + Following control flow is implemented (simplified excerpt): + +desc->handler->start(); +handle_IRQ_event(desc->action); +desc->handler->end(); + + + + + Default Edge IRQ type handler + + handle_edge_type provides a generic implementation + for edge type interrupts. + + + Following control flow is implemented (simplified excerpt): + +if (desc->status & running) { + desc->handler->hold(); + desc->status |= pending | masked; + return; +} +desc->handler->start(); +desc->status |= running; +do { + if (desc->status & masked) + desc->handler->enable(); + desc-status &= ~pending; + handle_IRQ_event(desc->action); +} while (status & pending); +desc-status &= ~running; +desc->handler->end(); + + + + + Default simple IRQ type handler + + handle_simple_type provides a generic implementation + for simple type interrupts. + + + Note: The simple type handler does not call any + handler/chip primitives. + + + Following control flow is implemented (simplified excerpt): + +handle_IRQ_event(desc->action); + + + + + Default per CPU type handler + + handle_percpu_type provides a generic implementation + for per CPU type interrupts. + + + Per CPU interrupts are only available on SMP and + the handler provides a simplified version without + locking. + + + Following control flow is implemented (simplified excerpt): + +desc->handler->start(); +handle_IRQ_event(desc->action); +desc->handler->end(); + + + + + + Architecture specific type implementation + + If an architecture needs to implement its own type structures, then + the following primitives have to be implemented: + + handle_irq() - The handle_irq function pointer should preferably point to + one of the generic type handler functions + startup() - Optional + shutdown() - Optional + enable() + disable() + start() + hold() - For edge type interupts only + end() + set_type - Optional + set_affinity - SMP only + + + + + Quirks and optimizations + + The generic functions are intended for 'clean' architectures and chips, + which have no platform-specific IRQ handling quirks. If an architecture + needs to implement quirks on the 'flow' level then it can do so by + overriding the irqtype. This is also done for compatibility reasons, as + most architectures use irqtypes only at the moment. + + + An architecture could implement all of its IRQ logic via pushing + chip handling details into the irqtype's ->start()/->end()/->hold() + functions. This is only recommended when the underlying primitives + are pure chip primitives without additional quirks. The direct pointer + to the chip functions reduces the indirection level by one. + + + + + Chiplevel hardware encapsulation + + The chip level hardware description structure irq_chip + contains all the direct chip relevant functions, which + can be utilized by the irq_type implementations. + + ack() + mask_ack() - Optional, recommended for performance + mask() + unmask() + retrigger() - Optional + set_type() - Optional + set_wake() - Optional + + These primitives are strictly intended to mean what they say: ack means + ACK, masking means masking of an IRQ line, etc. It is up to the flow + handler(s) to use these basic units of lowlevel functionality. + + + + + + __do_IRQ entry point + + The original implementation __do_IRQ() is an alternative entry + point for all types of interrupts. + + + This handler turned out to be not suitable for all + interrupt hardware and was therefor reimplemented with split + functionality for egde/level/simple/percpu interrupts. This is not + only a functional optimization. It also shortenes code pathes for + interrupts. + + + To make use of the split implementation, replace the call to + __do_IRQ by a call to desc->handler->handle_irq() and associate + the appropriate handler function to desc->handler->handle_irq(). + In most cases the generic type and handler implementations should + be sufficient. + + + + + Locking on SMP + + The locking of chip registers is up to the architecture that + defines the chip primitives. There is a chip->lock field that can be used + for serialization, but the generic layer does not touch it. The per-irq + structure is protected via desc->lock, by the generic layer. + + + + Structures + + This chapter contains the autogenerated documentation of the structures which are + used in the generic IRQ layer. + +!Iinclude/linux/irq.h + + + + Public Functions Provided + + This chapter contains the autogenerated documentation of the kernel API functions + which are exported. + +!Ekernel/irq/manage.c + + + + Internal Functions Provided + + This chapter contains the autogenerated documentation of the internal functions. + +!Ikernel/irq/handle.c + + + + Credits + + The following people have contributed to this document: + + Thomas Gleixnertglx@linutronix.de + Ingo Molnarmingo@elte.hu + + + +
Index: linux.prev/Documentation/DocBook/kernel-api.tmpl =================================================================== --- linux.prev.orig/Documentation/DocBook/kernel-api.tmpl +++ linux.prev/Documentation/DocBook/kernel-api.tmpl @@ -54,6 +54,11 @@ !Ekernel/sched.c !Ekernel/timer.c + High-resolution timers +!Iinclude/linux/ktime.h +!Iinclude/linux/hrtimer.h +!Ekernel/hrtimer.c + Internal Functions !Ikernel/exit.c !Ikernel/signal.c Index: linux.prev/Documentation/RCU/proc.txt =================================================================== --- /dev/null +++ linux.prev/Documentation/RCU/proc.txt @@ -0,0 +1,207 @@ +/proc Filesystem Entries for RCU + + +CONFIG_RCU_STATS + +The CONFIG_RCU_STATS config option is available only in conjunction with +CONFIG_PREEMPT_RCU. It makes four /proc entries available, namely: rcuctrs, +rcuptrs, rcugp, and rcustats. + +/proc/rcuctrs + + CPU last cur + 0 1 1 + 1 1 1 + 2 1 1 + 3 0 2 + ggp = 230725 + +This displays the number of processes that started RCU read-side critical +sections on each CPU. In absence of preemption, the "last" and "cur" +counts for a given CPU will always sum to one. Therefore, in the example +output above, each CPU has started one RCU read-side critical section +that was later preempted. The "last" column counts RCU read-side critical +sections that started prior to the last counter flip, while the "cur" +column counts critical sections that started after the last counter flip. + +The "ggp" count is a count of the number of counter flips since boot. +Since this is shown as an odd number, the "cur" counts are stored in +the zero-th element of each of the per-CPU arrays, and the "last" counts +are stored in the first element of each of the per-CPU arrays. + + +/proc/rcuptrs + + nl=c04c7160/c04c7960 nt=c04c72d0 + wl=c04c7168/c04c794c wt=c04c72bc dl=c04c7170/00000000 dt=c04c7170 + +This displays the head and tail of each of CONFIG_PREEMPT_RCU's three +callback lists. This will soon change to display this on a per-CPU +basis, since each CPU will soon have its own set of callback lists. +In the example above, the "next" list header is located at hex address +0xc04c7160, the first element on the list at hex address 0xc04c7960, +and the last element on the list at hex address 0xc04c72d0. The "wl=" +and "wt=" output is similar for the "wait" list, and the "dl=" and "dt=" +output for the "done" list. The "done" list is normally emptied very +quickly after being filled, so will usually be empty as shown above. +Note that the tail pointer points into the list header in this case. + +Callbacks are placed in the "next" list by call_rcu(), moved to the +"wait" list after the next counter flip, and moved to the "done" list +on the counter flip after that. Once on the "done" list, the callbacks +are invoked. + + +/proc/rcugp + + oldggp=241419 newggp=241421 + +This entry invokes synchronize_rcu() and prints out the number of counter +flips since boot before and after the synchronize_rcu(). These two +numbers will always differ by at least two. Unless RCU is broken. ;-) + + +/proc/rcustats + + ggp=242416 lgp=242416 sr=0 rcc=396233 + na=2090938 nl=9 wa=2090929 wl=9 dl=0 dr=2090920 di=2090920 + rtf1=22230730 rtf2=20139162 rtf3=242416 rtfe1=2085911 rtfe2=5657 rtfe3=19896746 + +The quantities printed are as follows: + +o "ggp=": The number of flips since boot. + +o "lgp=": The number of flips sensed by the local structure since + boot. This will soon be per-CPU. + +o "sr=": The number of explicit call to synchronize_rcu(). + Except that this is currently broken, so always reads as zero. + It is likely to be removed... + +o "rcc=": The number of calls to rcu_check_callbacks(). + +o "na=": The number of callbacks that call_rcu() has registered + since boot. + +o "nl=": The number of callbacks currently on the "next" list. + +o "wa=": The number of callbacks that have moved to the "wait" + list since boot. + +o "wl=": The number of callbacks currently on the "wait" list. + +o "da=": The number of callbacks that have been moved to the + "done" list since boot. + +o "dl=": The number of callbacks currently on the "done" list. + +o "dr=": The number of callbacks that have been removed from the + "done" list since boot. + +o "di=": The number of callbacks that have been invoked after being + removed from the "done" list. + +o "rtf1=": The number of attempts to flip the counters. + +o "rtf2=": The number of attempts to flip the counters that successfully + acquired the fliplock. + +o "rtf3=": The number of successful counter flips. + +o "rtfe1=": The number of attempts to flip the counters that failed + due to the lock being held by someone else. + +o "rtfe2=": The number of attempts to flip the counters that were + abandoned due to someone else doing the job for us. + +o "rtfe3=": The number of attempts to flip the counters that failed + due to some task still being in an RCU read-side critical section + starting from before the last successful counter flip. + + +CONFIG_RCU_TORTURE_TEST + +The CONFIG_RCU_TORTURE_TEST config option is available for all RCU +implementations. It makes three /proc entries available, namely: rcutw, +rcutr, and rcuts. + + +/proc/rcutw + +Reading this entry starts a new torture test, or ends an earlier one +if one is already in progress (in other words, there can be only one +writer at a time). This sleeps uninterruptibly, so be sure to run +it in the background. One could argue that it would be good to have +multiple writers, but Linux uses RCU heavily enough that you will get +write-side contention whether you want it or not. If you want additional +write-side contention, repeatedly create and destroy several large file +trees in parallel. Or use some other RCU-protected update. + + +/proc/rcutr + +Reading this entry starts a new torture reader, which runs until sent +a signal (e.g., control-C). If testing an RCU implementation with +preemptible read-side critical sections, make sure to spawn at least +two /proc/rcutr instances for each CPU. + + +/proc/rcuts + +Displays the current state of the torture test: + + ggp = 20961 + rtc: c04496f4 ver: 8734 tfle: 0 rta: 8734 rtaf: 0 rtf: 8715 + Reader Pipe: 88024120 63914 0 0 0 0 0 0 0 0 0 + Reader Batch: 88024097 63937 0 0 0 0 0 0 0 0 + Free-Block Circulation: 8733 8731 8729 8727 8725 8723 8721 8719 8717 8715 0 + +The entries are as follows: + +o "ggp": The number of counter flips (or batches) since boot. + +o "rtc": The hexadecimal address of the structure currently visible + to readers. + +o "ver": The number of times since boot that the rcutw writer task + has changed the structure visible to readers. + +o "tfle": If non-zero, indicates that the "torture freelist" + containing structure to be placed into the "rtc" area is empty. + This condition is important, since it can fool you into thinking + that RCU is working when it is not. :-/ + +o "rta": Number of structures allocated from the torture freelist. + +o "rtaf": Number of allocations from the torture freelist that have + failed due to the list being empty. + +o "rtf": Number of frees into the torture freelist. + +o "Reader Pipe": Histogram of "ages" of structures seen by readers. + If any entries past the first two are non-zero, RCU is broken. + And /proc/rcuts prints "!!!" to make sure you notice. The age + of a newly allocated structure is zero, it becomes one when + removed from reader visibility, and is incremented once per + grace period subsequently -- and is freed after passing through + (RCU_TORTURE_PIPE_LEN-2) grace periods. + + The output displayed above was taken from a correctly working + RCU. If you want to see what it looks like when broken, break + it yourself. ;-) + +o "Reader Batch": Another histogram of "ages" of structures seen + by readers, but in terms of counter flips (or batches) rather + than in terms of grace periods. The legal number of non-zero + entries is again two. The reason for this separate view is + that it is easier to get the third entry to show up in the + "Reader Batch" list than in the "Reader Pipe" list. + +o "Free-Block Circulation": Shows the number of torture structures + that have reached a given point in the pipeline. The first element + should closely correspond to the number of structures allocated, + the second to the number that have been removed from reader view, + and all but the last remaining to the corresponding number of + passes through a grace period. The last entry should be zero, + as it is only incremented if a torture structure's counter + somehow gets incremented farther than it should. Index: linux.prev/Documentation/hrtimers.txt =================================================================== --- /dev/null +++ linux.prev/Documentation/hrtimers.txt @@ -0,0 +1,178 @@ + +hrtimers - subsystem for high-resolution kernel timers +---------------------------------------------------- + +This patch introduces a new subsystem for high-resolution kernel timers. + +One might ask the question: we already have a timer subsystem +(kernel/timers.c), why do we need two timer subsystems? After a lot of +back and forth trying to integrate high-resolution and high-precision +features into the existing timer framework, and after testing various +such high-resolution timer implementations in practice, we came to the +conclusion that the timer wheel code is fundamentally not suitable for +such an approach. We initially didnt believe this ('there must be a way +to solve this'), and spent a considerable effort trying to integrate +things into the timer wheel, but we failed. In hindsight, there are +several reasons why such integration is hard/impossible: + +- the forced handling of low-resolution and high-resolution timers in + the same way leads to a lot of compromises, macro magic and #ifdef + mess. The timers.c code is very "tightly coded" around jiffies and + 32-bitness assumptions, and has been honed and micro-optimized for a + relatively narrow use case (jiffies in a relatively narrow HZ range) + for many years - and thus even small extensions to it easily break + the wheel concept, leading to even worse compromises. The timer wheel + code is very good and tight code, there's zero problems with it in its + current usage - but it is simply not suitable to be extended for + high-res timers. + +- the unpredictable [O(N)] overhead of cascading leads to delays which + necessiate a more complex handling of high resolution timers, which + in turn decreases robustness. Such a design still led to rather large + timing inaccuracies. Cascading is a fundamental property of the timer + wheel concept, it cannot be 'designed out' without unevitably + degrading other portions of the timers.c code in an unacceptable way. + +- the implementation of the current posix-timer subsystem on top of + the timer wheel has already introduced a quite complex handling of + the required readjusting of absolute CLOCK_REALTIME timers at + settimeofday or NTP time - further underlying our experience by + example: that the timer wheel data structure is too rigid for high-res + timers. + +- the timer wheel code is most optimal for use cases which can be + identified as "timeouts". Such timeouts are usually set up to cover + error conditions in various I/O paths, such as networking and block + I/O. The vast majority of those timers never expire and are rarely + recascaded because the expected correct event arrives in time so they + can be removed from the timer wheel before any further processing of + them becomes necessary. Thus the users of these timeouts can accept + the granularity and precision tradeoffs of the timer wheel, and + largely expect the timer subsystem to have near-zero overhead. + Accurate timing for them is not a core purpose - in fact most of the + timeout values used are ad-hoc. For them it is at most a necessary + evil to guarantee the processing of actual timeout completions + (because most of the timeouts are deleted before completion), which + should thus be as cheap and unintrusive as possible. + +The primary users of precision timers are user-space applications that +utilize nanosleep, posix-timers and itimer interfaces. Also, in-kernel +users like drivers and subsystems which require precise timed events +(e.g. multimedia) can benefit from the availability of a seperate +high-resolution timer subsystem as well. + +While this subsystem does not offer high-resolution clock sources just +yet, the hrtimer subsystem can be easily extended with high-resolution +clock capabilities, and patches for that exist and are maturing quickly. +The increasing demand for realtime and multimedia applications along +with other potential users for precise timers gives another reason to +separate the "timeout" and "precise timer" subsystems. + +Another potential benefit is that such a seperation allows even more +special-purpose optimization of the existing timer wheel for the low +resolution and low precision use cases - once the precision-sensitive +APIs are separated from the timer wheel and are migrated over to +hrtimers. E.g. we could decrease the frequency of the timeout subsystem +from 250 Hz to 100 HZ (or even smaller). + +hrtimer subsystem implementation details +---------------------------------------- + +the basic design considerations were: + +- simplicity + +- data structure not bound to jiffies or any other granularity. All the + kernel logic works at 64-bit nanoseconds resolution - no compromises. + +- simplification of existing, timing related kernel code + +another basic requirement was the immediate enqueueing and ordering of +timers at activation time. After looking at several possible solutions +such as radix trees and hashes, we chose the red black tree as the basic +data structure. Rbtrees are available as a library in the kernel and are +used in various performance-critical areas of e.g. memory management and +file systems. The rbtree is solely used for time sorted ordering, while +a separate list is used to give the expiry code fast access to the +queued timers, without having to walk the rbtree. + +(This seperate list is also useful for later when we'll introduce +high-resolution clocks, where we need seperate pending and expired +queues while keeping the time-order intact.) + +Time-ordered enqueueing is not purely for the purposes of +high-resolution clocks though, it also simplifies the handling of +absolute timers based on a low-resolution CLOCK_REALTIME. The existing +implementation needed to keep an extra list of all armed absolute +CLOCK_REALTIME timers along with complex locking. In case of +settimeofday and NTP, all the timers (!) had to be dequeued, the +time-changing code had to fix them up one by one, and all of them had to +be enqueued again. The time-ordered enqueueing and the storage of the +expiry time in absolute time units removes all this complex and poorly +scaling code from the posix-timer implementation - the clock can simply +be set without having to touch the rbtree. This also makes the handling +of posix-timers simpler in general. + +The locking and per-CPU behavior of hrtimers was mostly taken from the +existing timer wheel code, as it is mature and well suited. Sharing code +was not really a win, due to the different data structures. Also, the +hrtimer functions now have clearer behavior and clearer names - such as +hrtimer_try_to_cancel() and hrtimer_cancel() [which are roughly +equivalent to del_timer() and del_timer_sync()] - so there's no direct +1:1 mapping between them on the algorithmical level, and thus no real +potential for code sharing either. + +Basic data types: every time value, absolute or relative, is in a +special nanosecond-resolution type: ktime_t. The kernel-internal +representation of ktime_t values and operations is implemented via +macros and inline functions, and can be switched between a "hybrid +union" type and a plain "scalar" 64bit nanoseconds representation (at +compile time). The hybrid union type optimizes time conversions on 32bit +CPUs. This build-time-selectable ktime_t storage format was implemented +to avoid the performance impact of 64-bit multiplications and divisions +on 32bit CPUs. Such operations are frequently necessary to convert +between the storage formats provided by kernel and userspace interfaces +and the internal time format. (See include/linux/ktime.h for further +details.) + +hrtimers - rounding of timer values +----------------------------------- + +the hrtimer code will round timer events to lower-resolution clocks +because it has to. Otherwise it will do no artificial rounding at all. + +one question is, what resolution value should be returned to the user by +the clock_getres() interface. This will return whatever real resolution +a given clock has - be it low-res, high-res, or artificially-low-res. + +hrtimers - testing and verification +---------------------------------- + +We used the high-resolution clock subsystem ontop of hrtimers to verify +the hrtimer implementation details in praxis, and we also ran the posix +timer tests in order to ensure specification compliance. We also ran +tests on low-resolution clocks. + +The hrtimer patch converts the following kernel functionality to use +hrtimers: + + - nanosleep + - itimers + - posix-timers + +The conversion of nanosleep and posix-timers enabled the unification of +nanosleep and clock_nanosleep. + +The code was successfully compiled for the following platforms: + + i386, x86_64, ARM, PPC, PPC64, IA64 + +The code was run-tested on the following platforms: + + i386(UP/SMP), x86_64(UP/SMP), ARM, PPC + +hrtimers were also integrated into the -rt tree, along with a +hrtimers-based high-resolution clock implementation, so the hrtimers +code got a healthy amount of testing and use in practice. + + Thomas Gleixner, Ingo Molnar Index: linux.prev/Documentation/kernel-parameters.txt =================================================================== --- linux.prev.orig/Documentation/kernel-parameters.txt +++ linux.prev/Documentation/kernel-parameters.txt @@ -52,6 +52,7 @@ restrictions referred to are that the re MTD MTD support is enabled. NET Appropriate network support is enabled. NUMA NUMA support is enabled. + GENERIC_TIME The generic timeofday code is enabled. NFS Appropriate NFS support is enabled. OSS OSS sound support is enabled. PARIDE The ParIDE subsystem is enabled. @@ -329,10 +330,11 @@ running once the system is up. Value can be changed at runtime via /selinux/checkreqprot. - clock= [BUGS=IA-32,HW] gettimeofday timesource override. - Forces specified timesource (if avaliable) to be used - when calculating gettimeofday(). If specicified - timesource is not avalible, it defaults to PIT. + clock= [BUGS=IA-32, HW] gettimeofday clocksource override. + [Deprecated] + Forces specified clocksource (if avaliable) to be used + when calculating gettimeofday(). If specified + clocksource is not avalible, it defaults to PIT. Format: { pit | tsc | cyclone | pmtmr } hpet= [IA-32,HPET] option to disable HPET and use PIT. @@ -1477,6 +1479,10 @@ running once the system is up. time Show timing data prefixed to each printk message line + clocksource= [GENERIC_TIME] Override the default clocksource + Override the default clocksource and use the clocksource + with the name specified. + tipar.timeout= [HW,PPT] Set communications timeout in tenths of a second (default 15). Index: linux.prev/Documentation/timekeeping.txt =================================================================== --- /dev/null +++ linux.prev/Documentation/timekeeping.txt @@ -0,0 +1,350 @@ +How timekeeping works with CONFIG_GENERIC_TIME +======================================================================== + +The generic timekeeping code maintains and allows access to the systems +understanding of how much time has passed from a certain point. However, in +order to measure the passing of time, the generic timekeeping code relies on +the clocksource abstraction. A clocksource abstracts a free running counter +who's value increases at a known frequency. + +In the generic timekeeping code, we use a pointer to a selected clocksource to +measure the passing of time. + +struct clocksource *clock + +The clocksource has some limitations however. Since its likely of fixed width, +it will not increment forever and will overflow. In order to still properly +keep time, we must occasionally accumulate an interval of time. In the generic +timekeeping code, we accumulate the amount of time system the system booted +into the value system_time, which keeps nanosecond resolution in a ktime_t +storage. + +ktime_t system_time + +Since its likely your system has not been running continually since midnight on +the 1st of January in 1970, we must provide an offset from that time in +accordance with conventions. This only occasionally changed (via +settimeofday()) offset is the wall_time_offset value, which is also stored as a +ktime_t. + +ktime_t wall_time_offset + + +Since we accumulate time in intervals, we need a base cycle value that we can +use to generate an offset from the time value kept in system_time. We store +this value in cycle_last. + +cycle_t cycle_last; + + +Further since all clocks drift somewhat from each other, we use the adjustment +values provided via adjtimex() to correct our clocksource frequency for each +interval. This frequency adjustment value is stored in ntp_adj. + +long ntp_adj; + +Now that we've covered the core global variables for timekeeping, lets look at +how we maintain these values. + +As stated above, we want to avoid the clocksource from overflowing on us, so we +accumulate a time interval periodically. This periodic accumulation function is +called timeofday_periodic_hook(). In simplified pseudo code, it logically is +presented as: + +timeofday_periodic_hook(): + cycle_now = read_clocksource(clock) + cycle_delta = (cycle_now - cycle_last) & clock->mask + nsec = cyc2ns(clock, cycle_delta, ntp_adj) + system_time += nsec + cycle_last = cycle_now + + /* do other stuff */ + +You can see we read the cycle value from the clocksource, calculate a cycle +delta for the interval since we last called timeofday_periodic_hook(), convert +that cycle delta to a nanosecond interval (for now ignore ntp_adj), add it to +the system time and finally set our cycle_last value to cycle_now for the next +interval. Using this simple algorithm we can correctly measure and record the +passing of time. + +But just storing this info isn't very useful, we also want to make it available +to be used elsewhere. So how do we provide a notion of how much time has passed +inbetween calls to timeofday_periodic_hook()? + +First, lets create a function that calculates the time since the last call to +timeofday_peridoic_hook(). + +get_nsec_offset(): + cycle_now = read_clocksource(clock) + cycle_delta = (cycle_now - cycle_last) & clock->mask + nsec = cyc2ns(clock, cycle_delta, ntp_adj) + return nsec + +Here you can see, we read the clocksource, calculate a cycle interval, and +convert that to a nanosecond interval. Just like how it is done in +timeofday_periodic_hook! + +Now lets use this function to provide the number of nanoseconds that the system +has been running: + +do_monotonic_clock(): + return system_time + get_nsec_offset() + +Here we trivially add the nanosecond offset since the last +timeofday_periodic_hook() to the value of system_time which was stored at the +last timeofday_periodic_hook(). + +Note that since we use the same method to calculate time intervals, assuming +each function is atomic and the clocksource functions as it should, time cannot +go backward! + +Now to get the time of day using the standard convention: + +do_gettimeofday(): + return do_monotonic_clock() + wall_time_offset + +We simply add the wall_time_offset, and we have the number of nanoseconds since +1970 began! + + +Of course, in real life, things are not so static. We have to handle a number +of dynamic values that may change and affect timekeeping. In order to do these +safely, we must only change values in-between intervals. This means the +periodic_hook call must handle these changes. + +Since clocksources can be changed while the system is running, we need to check +for and possibly switch to using new clocksources in the periodic_hook call. +Further, clocksources may change their frequency. Since this must be done only +at a safe point, we use the update_callback function pointer (for more details, +see "How to write a clocksource driver" below), this too must be done +in-between intervals in the periodic_hook call. Finally, since the ntp +adjustment made in the cyc2ns conversion is not static, we need to update the +ntp state machine and get a calculate a new adjustment value. + +This adds some extra pseudo code to the timeofday_periodic_hook function: + +timeofday_periodic_hook(): + cycle_now = read_clocksource(clock) + cycle_delta = (cycle_now - cycle_last) & clock->mask + nsec = cyc2ns(clock, cycle_delta, ntp_adj) + system_time += nsec + cycle_last = cycle_now + + next = get_next_clocksource() + if(next != clock): + cycle_last = read_clocksource(next) + clock = next + + if(clock->update_callback): + clock->update_callback() + + ntp_advance(nsec) + ppm = ntp_get_ppm_adjustment() + ntp_adj = ppm_to_mult_adj(clock, ppm) + + +Unfortunately, the actual timeofday_periodic_hook code is not as simple as this +pseudo code. For performance concerns, much has been done to pre-calculate +values and use them repeatedly. Thus be aware that the code in timeofday.c is +more complex, however the functional logic is the same. + + +How to port an architecture to GENERIC_TIME +======================================================================== +Porting an architecture to the GENERIC_TIME timekeeping code consists of moving +a little bit of code around then deleting a fair amount. It is my hope that +this will reduce the arch specific maintenance work around timekeeping. + +Porting an arch usually requires the following steps. + +1. Define CONFIG_GENERIC_TIME in the arches Kconfig +2. Implementing the following functions + nsec_t read_persistent_clock(void) + void sync_persistent_clock(struct timespec ts) +3. Removing all of the arch specific timekeeping code + do_gettimeofday() + do_settimeofday() + etc +4. Implementing clocksource drivers + See "How to write a clocksource driver" for more details + +The exceptions to the above are: + +5. If the arch is has no continuous clocksource + A) Implement 1-3 in the above list. + B) Define CONFIG_IS_TICK_BASED in arches Kconfig + C) Implement the "long arch_getoffset(void)" function + +6. If the arch supports vsyscall gettimeofday (see x86_64 for reference) + A) Implement 1-4 in the above list + B) Define GENERIC_TIME_VSYSCALL + C) Implement arch_update_vsyscall_gtod() + D) Implement vsyscall gettimeofday (similar to __get_realtime_clock_ts) + E) Implement vread functions for supported clocksources + + + +How to write a clocksource driver. +======================================================================== +First, a quick summary of what a clocksource driver provides. + +Simply put, a clocksource is a abstraction of a free running increasing +counter. The abstraction provides the minimal amount of info for that counter +to be usable for timekeeping. Those required values are: + 1. It's name + 2. A rating value for selection priority + 3. A read function pointer + 4. A mask value for correct twos-complement subtraction + 5. A mult and shift pair that approximate the counter frequency + mult/(2^shift) ~= nanoseconds per cycle + +Additionally, there are other optionally set values that allow for advanced +functionality. Those values are: + 6. The update_callback function. + 7. The is_continuous flag. + 8. The vread function pointer + 9. The vdata pointer value + + +Now lets go over these values in detail. + +1. Name. + The clocksource's name should be unique since it is used for both +identification as well as for manually overriding the default clocksource +selection. The name length must be shorter then 32 characters in order for it +to be properly overrided. + +2. Rating value + This rating value is used as a priority value for clocksource +selection. It has no direct connection to quality or physical properties of the +clocksource, but is to be set and manipulated to guarantee that the best (by no +specific metric) clocksource that will provide correct timekeeping is +automatically selected. Rating suggestions can be found in +include/linux/clocksource.h + +3. Read function pointer + This pointer should point to a function that returns an unsigned +increasing cycle value from the clocksource. The value should have a coverage +from zero to the maximum cycle value the clocksource can provide. This does not +have to be direct hardware value and can also be a software counter. An example +of a software counter is the jiffies clocksource. + +4. The mask value + This value should be the largest power of two that is smaller then the +maximum cycle value. This allows twos complement subtraction to work on +overflow boundary conditions if the max value is less then (cycle_t)-1. So for +example, if we have a 16 bit counter (ie: one that loops to zero after +0x0000FFFF), the mask would be 0xFFFF. So then when finding the cycle +difference around a overflow, where now = 0x0013 and then = 0xFFEE, we can +compute the cycle delta properly using the equation: + delta = (now - then)&mask + delta = (0x0013 - 0xFFEE) & 0xFFFF + delta = 0xFFFF0025 & 0xFFFF /* note the unmasked negative value */ + delta = 0x25 + +5. The mult and shift pair + These 32bit values approximate the nanosecond per cycle frequency of +the clocksource using the equation: mult/(2^shift). If you have a khz or hz +frequency value, the mult value for a given shift value can be easily +calculated using the clocksource_hz2mult() and clocksource_khz2mult() helper +functions. When selecting a shift value, it is important to be careful. Larger +shift values give a finer precision in the cycle to nanosecond conversion and +allows for more exact NTP adjustments. However if you select too large a shift +value, the resulting mult value might overflow a cycle_t * mult computation. + + +So if you have a simple hardware counter that does not change frequency, +filling in the above should be sufficient for a functional clocksource. But +read on for details on implementing a more complex clocksource. + +6. The update_callback function pointer. + If this function pointer is non-NULL, it will be called every periodic +hook when it is safe for the clocksource to change its state. This would be +necessary in the case where the counter frequency changes, for example. One +user of this function pointer is the TSC clocksource. When the TSC frequency +changes (which may occur if the cpu changes frequency) we need to notify the +clocksource at a safe point where that state may change. Thus, if the TSC has +changed frequency we set the new mult/shift values in the update_callback +function. + +7. The is_continuous flag. + This flag variable (0 if false, 1 if true) denotes that the clocksource +is continuous. This means that it is a purely hardware driven clocksource and +is not dependent on any software code to run for it to increment properly. This +denotation will be useful in the future when timer ticks may be disabled for +long periods of time. Doing so using software clocksources, like the jiffies +clocksource, would cause timekeeping problems. + +8. The vread function pointer. + This function pointer points to a user-space accessible function that +reads the clocksource. This is used in userspace gettimeofday implementations +to improve performance. See the x86-64 TSC clocksource implementation for an +example. + +8. The vdata pointer. + This pointer is passed to the vread function pointer in a userspace +gettimeofday implementation. Its usage is dependent on the vread +implementation, but if the pointer points to data, that data must be readable +from userspace. + + +Now lets write a quick clocksource for an imaginary bit of hardware. Here are +the specs: + + A 32bit counter can be found at the MMIO address 0xFEEDF000. It runs at +100Mhz. To enable it, the the low bit of the address 0xFEEDF0F0 must be set to +one. + +So lets start out an empty cool-counter.c file, and define the clocksource. + +#include +#include +#include + +#define COOL_READ_PTR 0xFEEDF000 +#define COOL_START_PTR 0xFEEDF0F0 + +static __iomem *cool_ptr = COOL_READ_PTR; + +struct clocksource clocksource_cool +{ + .name = "cool", + .rating = 200, /* its a pretty decent clock */ + .mask = 0xFFFFFFFF, /* 32 bits */ + .mult = 0, /*to be computed */ + .shift = 10, +} + + +Now let's write the read function: + +cycle_t cool_counter_read(void) +{ + cycle_t ret = readl(cool_ptr); + return ret; +} + +Finally, lets write the init function: + +void cool_counter_init(void) +{ + __iomem *ptr = COOL_START_PTR; + u32 val; + + /* start the counter */ + val = readl(ptr); + val |= 0x1; + writel(val, ptr); + + /* finish initializing the clocksource */ + clocksource_cool.read = cool_counter_read; + clocksource_cool.mult = clocksource_khz2mult(100000, + clocksource_cool.shift); + + /* register the clocksource */ + register_clocksource(&clocksource_cool); +} +module_init(cool_counter_init); + + +Now wasn't that easy! Index: linux.prev/Makefile =================================================================== --- linux.prev.orig/Makefile +++ linux.prev/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 15 -EXTRAVERSION = +EXTRAVERSION =-rt21 NAME=Sliding Snow Leopard # *DOCUMENTATION* @@ -519,10 +519,14 @@ CFLAGS += $(call add-align,CONFIG_CC_AL CFLAGS += $(call add-align,CONFIG_CC_ALIGN_LOOPS,-loops) CFLAGS += $(call add-align,CONFIG_CC_ALIGN_JUMPS,-jumps) -ifdef CONFIG_FRAME_POINTER -CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) +ifdef CONFIG_MCOUNT +CFLAGS += -pg -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) else -CFLAGS += -fomit-frame-pointer + ifdef CONFIG_FRAME_POINTER + CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) + else + CFLAGS += -fomit-frame-pointer + endif endif ifdef CONFIG_DEBUG_INFO Index: linux.prev/arch/arm/Kconfig =================================================================== --- linux.prev.orig/arch/arm/Kconfig +++ linux.prev/arch/arm/Kconfig @@ -50,6 +50,10 @@ config UID16 bool default y +config GENERIC_HARDIRQS + bool + default y + config RWSEM_GENERIC_SPINLOCK bool default y @@ -368,18 +372,7 @@ config LOCAL_TIMERS accounting to be spread across the timer interval, preventing a "thundering herd" at every timer tick. -config PREEMPT - bool "Preemptible Kernel (EXPERIMENTAL)" - depends on EXPERIMENTAL - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. - - Say Y here if you are building a kernel for a desktop, embedded - or real-time system. Say N if you are unsure. +source kernel/Kconfig.preempt config NO_IDLE_HZ bool "Dynamic tick timer" Index: linux.prev/arch/arm/boot/compressed/head.S =================================================================== --- linux.prev.orig/arch/arm/boot/compressed/head.S +++ linux.prev/arch/arm/boot/compressed/head.S @@ -710,6 +710,19 @@ memdump: mov r12, r0 mov pc, r10 #endif +#ifdef CONFIG_MCOUNT +/* CONFIG_MCOUNT causes boot header to be built with -pg requiring this + * trampoline + */ + .text + .align 0 + .type mcount %function + .global mcount +mcount: + mov pc, lr @ just return +#endif + + reloc_end: .align Index: linux.prev/arch/arm/boot/compressed/misc.c =================================================================== --- linux.prev.orig/arch/arm/boot/compressed/misc.c +++ linux.prev/arch/arm/boot/compressed/misc.c @@ -199,6 +199,7 @@ static ulg free_mem_ptr_end; #define HEAP_SIZE 0x2000 +#define ZLIB_INFLATE_NO_INFLATE_LOCK #include "../../../../lib/inflate.c" #ifndef STANDALONE_DEBUG Index: linux.prev/arch/arm/common/dmabounce.c =================================================================== --- linux.prev.orig/arch/arm/common/dmabounce.c +++ linux.prev/arch/arm/common/dmabounce.c @@ -404,11 +404,11 @@ dma_map_single(struct device *dev, void BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); dma_addr = map_single(dev, ptr, size, dir); - local_irq_restore(flags); + raw_local_irq_restore(flags); return dma_addr; } @@ -431,11 +431,11 @@ dma_unmap_single(struct device *dev, dma BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); unmap_single(dev, dma_addr, size, dir); - local_irq_restore(flags); + raw_local_irq_restore(flags); } int @@ -450,7 +450,7 @@ dma_map_sg(struct device *dev, struct sc BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); for (i = 0; i < nents; i++, sg++) { struct page *page = sg->page; @@ -462,7 +462,7 @@ dma_map_sg(struct device *dev, struct sc map_single(dev, ptr, length, dir); } - local_irq_restore(flags); + raw_local_irq_restore(flags); return nents; } @@ -479,7 +479,7 @@ dma_unmap_sg(struct device *dev, struct BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); for (i = 0; i < nents; i++, sg++) { dma_addr_t dma_addr = sg->dma_address; @@ -488,7 +488,7 @@ dma_unmap_sg(struct device *dev, struct unmap_single(dev, dma_addr, length, dir); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void @@ -500,11 +500,11 @@ dma_sync_single_for_cpu(struct device *d dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n", __func__, (void *) dma_addr, size, dir); - local_irq_save(flags); + raw_local_irq_save(flags); sync_single(dev, dma_addr, size, dir); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void @@ -516,11 +516,11 @@ dma_sync_single_for_device(struct device dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n", __func__, (void *) dma_addr, size, dir); - local_irq_save(flags); + raw_local_irq_save(flags); sync_single(dev, dma_addr, size, dir); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void @@ -535,7 +535,7 @@ dma_sync_sg_for_cpu(struct device *dev, BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); for (i = 0; i < nents; i++, sg++) { dma_addr_t dma_addr = sg->dma_address; @@ -544,7 +544,7 @@ dma_sync_sg_for_cpu(struct device *dev, sync_single(dev, dma_addr, length, dir); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void @@ -559,7 +559,7 @@ dma_sync_sg_for_device(struct device *de BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); for (i = 0; i < nents; i++, sg++) { dma_addr_t dma_addr = sg->dma_address; @@ -568,7 +568,7 @@ dma_sync_sg_for_device(struct device *de sync_single(dev, dma_addr, length, dir); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } static int Index: linux.prev/arch/arm/common/locomo.c =================================================================== --- linux.prev.orig/arch/arm/common/locomo.c +++ linux.prev/arch/arm/common/locomo.c @@ -425,6 +425,12 @@ static struct irqchip locomo_spi_chip = .unmask = locomo_spi_unmask_irq, }; +static DEFINE_IRQ_CHAINED_TYPE(locomo_handler); +static DEFINE_IRQ_CHAINED_TYPE(locomo_key_handler); +static DEFINE_IRQ_CHAINED_TYPE(locomo_gpio_handler); +static DEFINE_IRQ_CHAINED_TYPE(locomo_lt_handler); +static DEFINE_IRQ_CHAINED_TYPE(locomo_spi_handler); + static void locomo_setup_irq(struct locomo *lchip) { int irq; Index: linux.prev/arch/arm/common/sa1111.c =================================================================== --- linux.prev.orig/arch/arm/common/sa1111.c +++ linux.prev/arch/arm/common/sa1111.c @@ -171,11 +171,11 @@ sa1111_irq_handler(unsigned int irq, str for (i = IRQ_SA1111_START; stat0; i++, stat0 >>= 1) if (stat0 & 1) - do_edge_IRQ(i, irq_desc + i, regs); + handle_edge_irq(i, irq_desc + i, regs); for (i = IRQ_SA1111_START + 32; stat1; i++, stat1 >>= 1) if (stat1 & 1) - do_edge_IRQ(i, irq_desc + i, regs); + handle_edge_irq(i, irq_desc + i, regs); /* For level-based interrupts */ desc->chip->unmask(irq); @@ -380,6 +380,8 @@ static struct irqchip sa1111_high_chip = .set_wake = sa1111_wake_highirq, }; +static DEFINE_IRQ_CHAINED_TYPE(sa1111_irq_handler); + static void sa1111_setup_irq(struct sa1111 *sachip) { void __iomem *irqbase = sachip->base + SA1111_INTC; Index: linux.prev/arch/arm/common/time-acorn.c =================================================================== --- linux.prev.orig/arch/arm/common/time-acorn.c +++ linux.prev/arch/arm/common/time-acorn.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -76,7 +77,7 @@ ioc_timer_interrupt(int irq, void *dev_i static struct irqaction ioc_timer_irq = { .name = "timer", - .flags = SA_INTERRUPT, + .flags = SA_INTERRUPT | SA_NODELAY, .handler = ioc_timer_interrupt }; Index: linux.prev/arch/arm/kernel/calls.S =================================================================== --- linux.prev.orig/arch/arm/kernel/calls.S +++ linux.prev/arch/arm/kernel/calls.S @@ -7,11 +7,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * - * This file is included twice in entry-common.S + * NR_syscalls now defined in include/asm-arm/unistd.h - tglx */ -#ifndef NR_syscalls -#define NR_syscalls 328 -#else __syscall_start: /* 0 */ .long sys_restart_syscall @@ -341,4 +338,3 @@ __syscall_end: .rept NR_syscalls - (__syscall_end - __syscall_start) / 4 .long sys_ni_syscall .endr -#endif Index: linux.prev/arch/arm/kernel/dma.c =================================================================== --- linux.prev.orig/arch/arm/kernel/dma.c +++ linux.prev/arch/arm/kernel/dma.c @@ -22,7 +22,7 @@ #include -DEFINE_SPINLOCK(dma_spin_lock); +DEFINE_RAW_SPINLOCK(dma_spin_lock); #if MAX_DMA_CHANNELS > 0 Index: linux.prev/arch/arm/kernel/ecard.c =================================================================== --- linux.prev.orig/arch/arm/kernel/ecard.c +++ linux.prev/arch/arm/kernel/ecard.c @@ -619,7 +619,7 @@ ecard_irqexp_handler(unsigned int irq, s ecard_t *ec = slot_to_ecard(slot); if (ec->claimed) { - struct irqdesc *d = irqdesc + ec->irq; + struct irqdesc *d = irq_desc + ec->irq; /* * this ugly code is so that we can operate a * prioritorising system: @@ -1052,6 +1052,9 @@ ecard_probe(int slot, card_type_t type) return rc; } +static DEFINE_IRQ_CHAINED_TYPE(ecard_irqexp_handler); +static DEFINE_IRQ_CHAINED_TYPE(ecard_irq_handler); + /* * Initialise the expansion card system. * Locate all hardware - interrupt management and @@ -1081,8 +1084,10 @@ static int __init ecard_init(void) irqhw = ecard_probeirqhw(); - set_irq_chained_handler(IRQ_EXPANSIONCARD, - irqhw ? ecard_irqexp_handler : ecard_irq_handler); + if (irqhw) + set_irq_chained_handler(IRQ_EXPANSIONCARD, ecard_irqexp_handler); + else + set_irq_chained_handler(IRQ_EXPANSIONCARD, ecard_irq_handler); ecard_proc_init(); Index: linux.prev/arch/arm/kernel/entry-armv.S =================================================================== --- linux.prev.orig/arch/arm/kernel/entry-armv.S +++ linux.prev/arch/arm/kernel/entry-armv.S @@ -192,7 +192,7 @@ __irq_svc: irq_handler #ifdef CONFIG_PREEMPT ldr r0, [tsk, #TI_FLAGS] @ get flags - tst r0, #_TIF_NEED_RESCHED + tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED blne svc_preempt preempt_return: ldr r0, [tsk, #TI_PREEMPT] @ read preempt value @@ -219,7 +219,7 @@ svc_preempt: str r7, [tsk, #TI_PREEMPT] @ expects preempt_count == 0 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS - tst r0, #_TIF_NEED_RESCHED + tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED beq preempt_return @ go again b 1b #endif Index: linux.prev/arch/arm/kernel/entry-common.S =================================================================== --- linux.prev.orig/arch/arm/kernel/entry-common.S +++ linux.prev/arch/arm/kernel/entry-common.S @@ -3,6 +3,8 @@ * * Copyright (C) 2000 Russell King * + * LATENCY_TRACE/mcount support (C) 2005 Timesys john.cooper@timesys.com + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -41,7 +43,7 @@ ret_fast_syscall: fast_work_pending: str r0, [sp, #S_R0+S_OFF]! @ returned r0 work_pending: - tst r1, #_TIF_NEED_RESCHED + tst r1, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED bne work_resched tst r1, #_TIF_NOTIFY_RESUME | _TIF_SIGPENDING beq no_work_pending @@ -51,7 +53,8 @@ work_pending: b ret_slow_syscall @ Check work again work_resched: - bl schedule + bl __schedule + /* * "slow" syscall return path. "why" tells us if this was a real syscall. */ @@ -87,8 +90,6 @@ ENTRY(ret_from_fork) b ret_slow_syscall -#include "calls.S" - /*============================================================================= * SWI handler *----------------------------------------------------------------------------- @@ -271,3 +272,110 @@ sys_mmap2: str r5, [sp, #4] b do_mmap2 #endif + +#ifdef CONFIG_FRAME_POINTER + +#ifdef CONFIG_MCOUNT +/* + * At the point where we are in mcount() we maintain the + * frame of the prologue code and keep the call to mcount() + * out of the stack frame list: + + saved pc <---\ caller of instrumented routine + saved lr | + ip/prev_sp | + fp -----^ | + : | + | + -> saved pc | instrumented routine + | saved lr | + | ip/prev_sp | + | fp ---------/ + | : + | + | mcount + | saved pc + | saved lr + | ip/prev sp + -- fp + r3 + r2 + r1 + sp-> r0 + : + */ + + .text + .align 0 + .type mcount %function + .global mcount + +/* gcc -pg generated FUNCTION_PROLOGUE references mcount() + * and has already created the stack frame invocation for + * the routine we have been called to instrument. We create + * a complete frame nevertheless, as we want to use the same + * call to mcount() from c code. + */ +mcount: + + ldr ip, =mcount_enabled @ leave early, if disabled + ldr ip, [ip] + cmp ip, #0 + moveq pc,lr + + mov ip, sp + stmdb sp!, {r0 - r3, fp, ip, lr, pc} @ create stack frame + + ldr r1, [fp, #-4] @ get lr (the return address + @ of the caller of the + @ instrumented function) + mov r0, lr @ get lr - (the return address + @ of the instrumented function) + + sub fp, ip, #4 @ point fp at this frame + + bl __trace +1: + ldmdb fp, {r0 - r3, fp, sp, pc} @ pop entry frame and return + +#endif + +/* ARM replacement for unsupported gcc __builtin_return_address(n) + * where 0 < n. n == 0 is supported here as well. + * + * Walk up the stack frame until the desired frame is found or a NULL + * fp is encountered, return NULL in the latter case. + * + * Note: it is possible under code optimization for the stack invocation + * of an ancestor function (level N) to be removed before calling a + * descendant function (level N+1). No easy means is available to deduce + * this scenario with the result being [for example] caller_addr(0) when + * called from level N+1 returning level N-1 rather than the expected + * level N. This optimization issue appears isolated to the case of + * a call to a level N+1 routine made at the tail end of a level N + * routine -- the level N frame is deleted and a simple branch is made + * to the level N+1 routine. + */ + + .text + .align 0 + .type arm_return_addr %function + .global arm_return_addr + +arm_return_addr: + mov ip, r0 + mov r0, fp +3: + cmp r0, #0 + beq 1f @ frame list hit end, bail + cmp ip, #0 + beq 2f @ reached desired frame + ldr r0, [r0, #-12] @ else continue, get next fp + sub ip, ip, #1 + b 3b +2: + ldr r0, [r0, #-4] @ get target return address +1: + mov pc, lr + +#endif Index: linux.prev/arch/arm/kernel/fiq.c =================================================================== --- linux.prev.orig/arch/arm/kernel/fiq.c +++ linux.prev/arch/arm/kernel/fiq.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -88,7 +89,7 @@ void set_fiq_handler(void *start, unsign * disable irqs for the duration. Note - these functions are almost * entirely coded in assembly. */ -void __attribute__((naked)) set_fiq_regs(struct pt_regs *regs) +void notrace __attribute__((naked)) set_fiq_regs(struct pt_regs *regs) { register unsigned long tmp; asm volatile ( @@ -106,7 +107,7 @@ void __attribute__((naked)) set_fiq_regs : "r" (®s->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | FIQ_MODE)); } -void __attribute__((naked)) get_fiq_regs(struct pt_regs *regs) +void notrace __attribute__((naked)) get_fiq_regs(struct pt_regs *regs) { register unsigned long tmp; asm volatile ( Index: linux.prev/arch/arm/kernel/init_task.c =================================================================== --- linux.prev.orig/arch/arm/kernel/init_task.c +++ linux.prev/arch/arm/kernel/init_task.c @@ -12,8 +12,8 @@ #include #include -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); Index: linux.prev/arch/arm/kernel/irq.c =================================================================== --- linux.prev.orig/arch/arm/kernel/irq.c +++ linux.prev/arch/arm/kernel/irq.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -38,193 +39,11 @@ #include #include -#include #include -#include #include -/* - * Maximum IRQ count. Currently, this is arbitary. However, it should - * not be set too low to prevent false triggering. Conversely, if it - * is set too high, then you could miss a stuck IRQ. - * - * Maybe we ought to set a timer and re-enable the IRQ at a later time? - */ -#define MAX_IRQ_CNT 100000 - -static int noirqdebug; -static volatile unsigned long irq_err_count; -static DEFINE_SPINLOCK(irq_controller_lock); -static LIST_HEAD(irq_pending); - -struct irqdesc irq_desc[NR_IRQS]; void (*init_arch_irq)(void) __initdata = NULL; -/* - * No architecture-specific irq_finish function defined in arm/arch/irqs.h. - */ -#ifndef irq_finish -#define irq_finish(irq) do { } while (0) -#endif - -/* - * Dummy mask/unmask handler - */ -void dummy_mask_unmask_irq(unsigned int irq) -{ -} - -irqreturn_t no_action(int irq, void *dev_id, struct pt_regs *regs) -{ - return IRQ_NONE; -} - -void do_bad_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) -{ - irq_err_count += 1; - printk(KERN_ERR "IRQ: spurious interrupt %d\n", irq); -} - -static struct irqchip bad_chip = { - .ack = dummy_mask_unmask_irq, - .mask = dummy_mask_unmask_irq, - .unmask = dummy_mask_unmask_irq, -}; - -static struct irqdesc bad_irq_desc = { - .chip = &bad_chip, - .handle = do_bad_IRQ, - .pend = LIST_HEAD_INIT(bad_irq_desc.pend), - .disable_depth = 1, -}; - -#ifdef CONFIG_SMP -void synchronize_irq(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - - while (desc->running) - barrier(); -} -EXPORT_SYMBOL(synchronize_irq); - -#define smp_set_running(desc) do { desc->running = 1; } while (0) -#define smp_clear_running(desc) do { desc->running = 0; } while (0) -#else -#define smp_set_running(desc) do { } while (0) -#define smp_clear_running(desc) do { } while (0) -#endif - -/** - * disable_irq_nosync - disable an irq without waiting - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Enables and disables - * are nested. We do this lazily. - * - * This function may be called from IRQ context. - */ -void disable_irq_nosync(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&irq_controller_lock, flags); - desc->disable_depth++; - list_del_init(&desc->pend); - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -EXPORT_SYMBOL(disable_irq_nosync); - -/** - * disable_irq - disable an irq and wait for completion - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Enables and disables - * are nested. This functions waits for any pending IRQ - * handlers for this interrupt to complete before returning. - * If you use this function while holding a resource the IRQ - * handler may need you will deadlock. - * - * This function may be called - with care - from IRQ context. - */ -void disable_irq(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - - disable_irq_nosync(irq); - if (desc->action) - synchronize_irq(irq); -} -EXPORT_SYMBOL(disable_irq); - -/** - * enable_irq - enable interrupt handling on an irq - * @irq: Interrupt to enable - * - * Re-enables the processing of interrupts on this IRQ line. - * Note that this may call the interrupt handler, so you may - * get unexpected results if you hold IRQs disabled. - * - * This function may be called from IRQ context. - */ -void enable_irq(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&irq_controller_lock, flags); - if (unlikely(!desc->disable_depth)) { - printk("enable_irq(%u) unbalanced from %p\n", irq, - __builtin_return_address(0)); - } else if (!--desc->disable_depth) { - desc->probing = 0; - desc->chip->unmask(irq); - - /* - * If the interrupt is waiting to be processed, - * try to re-run it. We can't directly run it - * from here since the caller might be in an - * interrupt-protected region. - */ - if (desc->pending && list_empty(&desc->pend)) { - desc->pending = 0; - if (!desc->chip->retrigger || - desc->chip->retrigger(irq)) - list_add(&desc->pend, &irq_pending); - } - } - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -EXPORT_SYMBOL(enable_irq); - -/* - * Enable wake on selected irq - */ -void enable_irq_wake(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&irq_controller_lock, flags); - if (desc->chip->set_wake) - desc->chip->set_wake(irq, 1); - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -EXPORT_SYMBOL(enable_irq_wake); - -void disable_irq_wake(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&irq_controller_lock, flags); - if (desc->chip->set_wake) - desc->chip->set_wake(irq, 0); - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -EXPORT_SYMBOL(disable_irq_wake); - int show_interrupts(struct seq_file *p, void *v) { int i = *(loff_t *) v, cpu; @@ -243,7 +62,7 @@ int show_interrupts(struct seq_file *p, } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_controller_lock, flags); + spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto unlock; @@ -257,7 +76,7 @@ int show_interrupts(struct seq_file *p, seq_putc(p, '\n'); unlock: - spin_unlock_irqrestore(&irq_controller_lock, flags); + spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { #ifdef CONFIG_ARCH_ACORN show_fiq_list(p, v); @@ -266,374 +85,83 @@ unlock: show_ipi_list(p); show_local_irqs(p); #endif +#ifdef FIXME_TGLX seq_printf(p, "Err: %10lu\n", irq_err_count); - } - return 0; -} - -/* - * IRQ lock detection. - * - * Hopefully, this should get us out of a few locked situations. - * However, it may take a while for this to happen, since we need - * a large number if IRQs to appear in the same jiffie with the - * same instruction pointer (or within 2 instructions). - */ -static int check_irq_lock(struct irqdesc *desc, int irq, struct pt_regs *regs) -{ - unsigned long instr_ptr = instruction_pointer(regs); - - if (desc->lck_jif == jiffies && - desc->lck_pc >= instr_ptr && desc->lck_pc < instr_ptr + 8) { - desc->lck_cnt += 1; - - if (desc->lck_cnt > MAX_IRQ_CNT) { - printk(KERN_ERR "IRQ LOCK: IRQ%d is locking the system, disabled\n", irq); - return 1; - } - } else { - desc->lck_cnt = 0; - desc->lck_pc = instruction_pointer(regs); - desc->lck_jif = jiffies; - } - return 0; -} - -static void -report_bad_irq(unsigned int irq, struct pt_regs *regs, struct irqdesc *desc, int ret) -{ - static int count = 100; - struct irqaction *action; - - if (!count || noirqdebug) - return; - - count--; - - if (ret != IRQ_HANDLED && ret != IRQ_NONE) { - printk("irq%u: bogus retval mask %x\n", irq, ret); - } else { - printk("irq%u: nobody cared\n", irq); - } - show_regs(regs); - dump_stack(); - printk(KERN_ERR "handlers:"); - action = desc->action; - do { - printk("\n" KERN_ERR "[<%p>]", action->handler); - print_symbol(" (%s)", (unsigned long)action->handler); - action = action->next; - } while (action); - printk("\n"); -} - -static int -__do_irq(unsigned int irq, struct irqaction *action, struct pt_regs *regs) -{ - unsigned int status; - int ret, retval = 0; - - spin_unlock(&irq_controller_lock); - -#ifdef CONFIG_NO_IDLE_HZ - if (!(action->flags & SA_TIMER) && system_timer->dyn_tick != NULL) { - write_seqlock(&xtime_lock); - if (system_timer->dyn_tick->state & DYN_TICK_ENABLED) - system_timer->dyn_tick->handler(irq, 0, regs); - write_sequnlock(&xtime_lock); - } #endif - - if (!(action->flags & SA_INTERRUPT)) - local_irq_enable(); - - status = 0; - do { - ret = action->handler(irq, action->dev_id, regs); - if (ret == IRQ_HANDLED) - status |= action->flags; - retval |= ret; - action = action->next; - } while (action); - - if (status & SA_SAMPLE_RANDOM) - add_interrupt_randomness(irq); - - spin_lock_irq(&irq_controller_lock); - - return retval; -} - -/* - * This is for software-decoded IRQs. The caller is expected to - * handle the ack, clear, mask and unmask issues. - */ -void -do_simple_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) -{ - struct irqaction *action; - const unsigned int cpu = smp_processor_id(); - - desc->triggered = 1; - - kstat_cpu(cpu).irqs[irq]++; - - smp_set_running(desc); - - action = desc->action; - if (action) { - int ret = __do_irq(irq, action, regs); - if (ret != IRQ_HANDLED) - report_bad_irq(irq, regs, desc, ret); - } - - smp_clear_running(desc); -} - -/* - * Most edge-triggered IRQ implementations seem to take a broken - * approach to this. Hence the complexity. - */ -void -do_edge_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) -{ - const unsigned int cpu = smp_processor_id(); - - desc->triggered = 1; - - /* - * If we're currently running this IRQ, or its disabled, - * we shouldn't process the IRQ. Instead, turn on the - * hardware masks. - */ - if (unlikely(desc->running || desc->disable_depth)) - goto running; - - /* - * Acknowledge and clear the IRQ, but don't mask it. - */ - desc->chip->ack(irq); - - /* - * Mark the IRQ currently in progress. - */ - desc->running = 1; - - kstat_cpu(cpu).irqs[irq]++; - - do { - struct irqaction *action; - - action = desc->action; - if (!action) - break; - - if (desc->pending && !desc->disable_depth) { - desc->pending = 0; - desc->chip->unmask(irq); - } - - __do_irq(irq, action, regs); - } while (desc->pending && !desc->disable_depth); - - desc->running = 0; - - /* - * If we were disabled or freed, shut down the handler. - */ - if (likely(desc->action && !check_irq_lock(desc, irq, regs))) - return; - - running: - /* - * We got another IRQ while this one was masked or - * currently running. Delay it. - */ - desc->pending = 1; - desc->chip->mask(irq); - desc->chip->ack(irq); -} - -/* - * Level-based IRQ handler. Nice and simple. - */ -void -do_level_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) -{ - struct irqaction *action; - const unsigned int cpu = smp_processor_id(); - - desc->triggered = 1; - - /* - * Acknowledge, clear _AND_ disable the interrupt. - */ - desc->chip->ack(irq); - - if (likely(!desc->disable_depth)) { - kstat_cpu(cpu).irqs[irq]++; - - smp_set_running(desc); - - /* - * Return with this interrupt masked if no action - */ - action = desc->action; - if (action) { - int ret = __do_irq(irq, desc->action, regs); - - if (ret != IRQ_HANDLED) - report_bad_irq(irq, regs, desc, ret); - - if (likely(!desc->disable_depth && - !check_irq_lock(desc, irq, regs))) - desc->chip->unmask(irq); - } - - smp_clear_running(desc); } + return 0; } -static void do_pending_irqs(struct pt_regs *regs) -{ - struct list_head head, *l, *n; - - do { - struct irqdesc *desc; - - /* - * First, take the pending interrupts off the list. - * The act of calling the handlers may add some IRQs - * back onto the list. - */ - head = irq_pending; - INIT_LIST_HEAD(&irq_pending); - head.next->prev = &head; - head.prev->next = &head; - - /* - * Now run each entry. We must delete it from our - * list before calling the handler. - */ - list_for_each_safe(l, n, &head) { - desc = list_entry(l, struct irqdesc, pend); - list_del_init(&desc->pend); - desc_handle_irq(desc - irq_desc, desc, regs); - } - - /* - * The list must be empty. - */ - BUG_ON(!list_empty(&head)); - } while (!list_empty(&irq_pending)); -} +/* Handle bad interrupts */ +static struct irq_desc bad_irq = { + .handler = &no_irq_type, + .lock = RAW_SPIN_LOCK_UNLOCKED +}; /* - * do_IRQ handles all hardware IRQ's. Decoded IRQs should not + * asm_do_IRQ handles all hardware IRQ's. Decoded IRQs should not * come via this function. Instead, they should provide their * own 'handler' */ -asmlinkage void asm_do_IRQ(unsigned int irq, struct pt_regs *regs) +asmlinkage notrace void asm_do_IRQ(unsigned int irq, struct pt_regs *regs) { struct irqdesc *desc = irq_desc + irq; + trace_special(instruction_pointer(regs), irq, 0); + /* * Some hardware gives randomly wrong interrupts. Rather * than crashing, do something sensible. */ if (irq >= NR_IRQS) - desc = &bad_irq_desc; + desc = &bad_irq; irq_enter(); - spin_lock(&irq_controller_lock); - desc_handle_irq(irq, desc, regs); - /* - * Now re-run any pending interrupts. - */ - if (!list_empty(&irq_pending)) - do_pending_irqs(regs); - - irq_finish(irq); + desc_handle_irq(irq, desc, regs); - spin_unlock(&irq_controller_lock); irq_exit(); } -void __set_irq_handler(unsigned int irq, irq_handler_t handle, int is_chained) +void __set_irq_handler(unsigned int irq, struct irq_type *type, int is_chained) { struct irqdesc *desc; unsigned long flags; if (irq >= NR_IRQS) { - printk(KERN_ERR "Trying to install handler for IRQ%d\n", irq); + printk(KERN_ERR "Trying to install type control for IRQ%d\n", irq); return; } - if (handle == NULL) - handle = do_bad_IRQ; - desc = irq_desc + irq; - if (is_chained && desc->chip == &bad_chip) - printk(KERN_WARNING "Trying to install chained handler for IRQ%d\n", irq); - - spin_lock_irqsave(&irq_controller_lock, flags); - if (handle == do_bad_IRQ) { - desc->chip->mask(irq); - desc->chip->ack(irq); - desc->disable_depth = 1; - } - desc->handle = handle; - if (handle != do_bad_IRQ && is_chained) { - desc->valid = 0; - desc->probe_ok = 0; - desc->disable_depth = 0; - desc->chip->unmask(irq); + /* Uninstall ? */ + if (type == NULL || type == &no_irq_type) { + spin_lock_irqsave(&desc->lock, flags); + if (desc->chip) { + desc->chip->mask(irq); + desc->chip->ack(irq); + } + desc->depth = 1; + spin_unlock_irqrestore(&desc->lock, flags); } - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -void set_irq_chip(unsigned int irq, struct irqchip *chip) -{ - struct irqdesc *desc; - unsigned long flags; - - if (irq >= NR_IRQS) { - printk(KERN_ERR "Trying to install chip for IRQ%d\n", irq); + /* Install the irq_type */ + if (generic_set_irq_type(irq, type)) return; - } - - if (chip == NULL) - chip = &bad_chip; - - desc = irq_desc + irq; - spin_lock_irqsave(&irq_controller_lock, flags); - desc->chip = chip; - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -int set_irq_type(unsigned int irq, unsigned int type) -{ - struct irqdesc *desc; - unsigned long flags; - int ret = -ENXIO; + spin_lock_irqsave(&desc->lock, flags); + if (is_chained && (desc->handler == &no_irq_type || !desc->chip)) + printk(KERN_WARNING "Trying to install chained interrupt type for IRQ%d\n", irq); - if (irq >= NR_IRQS) { - printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq); - return -ENODEV; - } - - desc = irq_desc + irq; - if (desc->chip->set_type) { - spin_lock_irqsave(&irq_controller_lock, flags); - ret = desc->chip->set_type(irq, type); - spin_unlock_irqrestore(&irq_controller_lock, flags); + if (type != NULL && is_chained) { + desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE; + desc->depth = 0; + if (desc->chip) + desc->chip->unmask(irq); } - - return ret; + spin_unlock_irqrestore(&desc->lock, flags); } -EXPORT_SYMBOL(set_irq_type); void set_irq_flags(unsigned int irq, unsigned int iflags) { @@ -646,400 +174,28 @@ void set_irq_flags(unsigned int irq, uns } desc = irq_desc + irq; - spin_lock_irqsave(&irq_controller_lock, flags); - desc->valid = (iflags & IRQF_VALID) != 0; - desc->probe_ok = (iflags & IRQF_PROBE) != 0; - desc->noautoenable = (iflags & IRQF_NOAUTOEN) != 0; - spin_unlock_irqrestore(&irq_controller_lock, flags); -} - -int setup_irq(unsigned int irq, struct irqaction *new) -{ - int shared = 0; - struct irqaction *old, **p; - unsigned long flags; - struct irqdesc *desc; - - /* - * Some drivers like serial.c use request_irq() heavily, - * so we have to be careful not to interfere with a - * running system. - */ - if (new->flags & SA_SAMPLE_RANDOM) { - /* - * This function might sleep, we want to call it first, - * outside of the atomic block. - * Yes, this might clear the entropy pool if the wrong - * driver is attempted to be loaded, without actually - * installing a new handler, but is this really a problem, - * only the sysadmin is able to do this. - */ - rand_initialize_irq(irq); - } - - /* - * The following block of code has to be executed atomically - */ - desc = irq_desc + irq; - spin_lock_irqsave(&irq_controller_lock, flags); - p = &desc->action; - if ((old = *p) != NULL) { - /* Can't share interrupts unless both agree to */ - if (!(old->flags & new->flags & SA_SHIRQ)) { - spin_unlock_irqrestore(&irq_controller_lock, flags); - return -EBUSY; - } - - /* add new interrupt at end of irq queue */ - do { - p = &old->next; - old = *p; - } while (old); - shared = 1; - } - - *p = new; - - if (!shared) { - desc->probing = 0; - desc->running = 0; - desc->pending = 0; - desc->disable_depth = 1; - if (!desc->noautoenable) { - desc->disable_depth = 0; - desc->chip->unmask(irq); - } - } - - spin_unlock_irqrestore(&irq_controller_lock, flags); - return 0; -} - -/** - * request_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. From the point this - * call is made your handler function may be invoked. Since - * your handler function must clear any interrupt the board - * raises, you must take care both to initialise your hardware - * and to set up the interrupt handler in the right order. - * - * Dev_id must be globally unique. Normally the address of the - * device data structure is used as the cookie. Since the handler - * receives this value it makes sense to use it. - * - * If your interrupt is shared you must pass a non NULL dev_id - * as this is required when freeing the interrupt. - * - * Flags: - * - * SA_SHIRQ Interrupt is shared - * - * SA_INTERRUPT Disable local interrupts while processing - * - * SA_SAMPLE_RANDOM The interrupt can be used for entropy - * - */ -int request_irq(unsigned int irq, irqreturn_t (*handler)(int, void *, struct pt_regs *), - unsigned long irq_flags, const char * devname, void *dev_id) -{ - unsigned long retval; - struct irqaction *action; - - if (irq >= NR_IRQS || !irq_desc[irq].valid || !handler || - (irq_flags & SA_SHIRQ && !dev_id)) - return -EINVAL; - - action = (struct irqaction *)kmalloc(sizeof(struct irqaction), GFP_KERNEL); - if (!action) - return -ENOMEM; - - action->handler = handler; - action->flags = irq_flags; - cpus_clear(action->mask); - action->name = devname; - action->next = NULL; - action->dev_id = dev_id; - - retval = setup_irq(irq, action); - - if (retval) - kfree(action); - return retval; -} - -EXPORT_SYMBOL(request_irq); - -/** - * free_irq - free an interrupt - * @irq: Interrupt line to free - * @dev_id: Device identity to free - * - * Remove an interrupt handler. The handler is removed and if the - * interrupt line is no longer in use by any driver it is disabled. - * On a shared IRQ the caller must ensure the interrupt is disabled - * on the card it drives before calling this function. - * - * This function must not be called from interrupt context. - */ -void free_irq(unsigned int irq, void *dev_id) -{ - struct irqaction * action, **p; - unsigned long flags; - - if (irq >= NR_IRQS || !irq_desc[irq].valid) { - printk(KERN_ERR "Trying to free IRQ%d\n",irq); - dump_stack(); - return; - } - - spin_lock_irqsave(&irq_controller_lock, flags); - for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) { - if (action->dev_id != dev_id) - continue; - - /* Found it - now free it */ - *p = action->next; - break; - } - spin_unlock_irqrestore(&irq_controller_lock, flags); - - if (!action) { - printk(KERN_ERR "Trying to free free IRQ%d\n",irq); - dump_stack(); - } else { - synchronize_irq(irq); - kfree(action); - } -} - -EXPORT_SYMBOL(free_irq); - -static DECLARE_MUTEX(probe_sem); - -/* Start the interrupt probing. Unlike other architectures, - * we don't return a mask of interrupts from probe_irq_on, - * but return the number of interrupts enabled for the probe. - * The interrupts which have been enabled for probing is - * instead recorded in the irq_desc structure. - */ -unsigned long probe_irq_on(void) -{ - unsigned int i, irqs = 0; - unsigned long delay; - - down(&probe_sem); - - /* - * first snaffle up any unassigned but - * probe-able interrupts - */ - spin_lock_irq(&irq_controller_lock); - for (i = 0; i < NR_IRQS; i++) { - if (!irq_desc[i].probe_ok || irq_desc[i].action) - continue; - - irq_desc[i].probing = 1; - irq_desc[i].triggered = 0; - if (irq_desc[i].chip->set_type) - irq_desc[i].chip->set_type(i, IRQT_PROBE); - irq_desc[i].chip->unmask(i); - irqs += 1; - } - spin_unlock_irq(&irq_controller_lock); - - /* - * wait for spurious interrupts to mask themselves out again - */ - for (delay = jiffies + HZ/10; time_before(jiffies, delay); ) - /* min 100ms delay */; - - /* - * now filter out any obviously spurious interrupts - */ - spin_lock_irq(&irq_controller_lock); - for (i = 0; i < NR_IRQS; i++) { - if (irq_desc[i].probing && irq_desc[i].triggered) { - irq_desc[i].probing = 0; - irqs -= 1; - } - } - spin_unlock_irq(&irq_controller_lock); - - return irqs; -} - -EXPORT_SYMBOL(probe_irq_on); - -unsigned int probe_irq_mask(unsigned long irqs) -{ - unsigned int mask = 0, i; - - spin_lock_irq(&irq_controller_lock); - for (i = 0; i < 16 && i < NR_IRQS; i++) - if (irq_desc[i].probing && irq_desc[i].triggered) - mask |= 1 << i; - spin_unlock_irq(&irq_controller_lock); - - up(&probe_sem); - - return mask; -} -EXPORT_SYMBOL(probe_irq_mask); - -/* - * Possible return values: - * >= 0 - interrupt number - * -1 - no interrupt/many interrupts - */ -int probe_irq_off(unsigned long irqs) -{ - unsigned int i; - int irq_found = NO_IRQ; - - /* - * look at the interrupts, and find exactly one - * that we were probing has been triggered - */ - spin_lock_irq(&irq_controller_lock); - for (i = 0; i < NR_IRQS; i++) { - if (irq_desc[i].probing && - irq_desc[i].triggered) { - if (irq_found != NO_IRQ) { - irq_found = NO_IRQ; - goto out; - } - irq_found = i; - } - } - - if (irq_found == -1) - irq_found = NO_IRQ; -out: - spin_unlock_irq(&irq_controller_lock); - - up(&probe_sem); - - return irq_found; -} - -EXPORT_SYMBOL(probe_irq_off); - -#ifdef CONFIG_SMP -static void route_irq(struct irqdesc *desc, unsigned int irq, unsigned int cpu) -{ - pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->cpu, cpu); - - spin_lock_irq(&irq_controller_lock); - desc->cpu = cpu; - desc->chip->set_cpu(desc, irq, cpu); - spin_unlock_irq(&irq_controller_lock); -} - -#ifdef CONFIG_PROC_FS -static int -irq_affinity_read_proc(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct irqdesc *desc = irq_desc + ((int)data); - int len = cpumask_scnprintf(page, count, desc->affinity); - - if (count - len < 2) - return -EINVAL; - page[len++] = '\n'; - page[len] = '\0'; - - return len; -} - -static int -irq_affinity_write_proc(struct file *file, const char __user *buffer, - unsigned long count, void *data) -{ - unsigned int irq = (unsigned int)data; - struct irqdesc *desc = irq_desc + irq; - cpumask_t affinity, tmp; - int ret = -EIO; - - if (!desc->chip->set_cpu) - goto out; - - ret = cpumask_parse(buffer, count, affinity); - if (ret) - goto out; - - cpus_and(tmp, affinity, cpu_online_map); - if (cpus_empty(tmp)) { - ret = -EINVAL; - goto out; - } - - desc->affinity = affinity; - route_irq(desc, irq, first_cpu(tmp)); - ret = count; - - out: - return ret; -} -#endif -#endif - -void __init init_irq_proc(void) -{ -#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) - struct proc_dir_entry *dir; - int irq; - - dir = proc_mkdir("irq", NULL); - if (!dir) - return; - - for (irq = 0; irq < NR_IRQS; irq++) { - struct proc_dir_entry *entry; - struct irqdesc *desc; - char name[16]; - - desc = irq_desc + irq; - memset(name, 0, sizeof(name)); - snprintf(name, sizeof(name) - 1, "%u", irq); - - desc->procdir = proc_mkdir(name, dir); - if (!desc->procdir) - continue; - - entry = create_proc_entry("smp_affinity", 0600, desc->procdir); - if (entry) { - entry->nlink = 1; - entry->data = (void *)irq; - entry->read_proc = irq_affinity_read_proc; - entry->write_proc = irq_affinity_write_proc; - } - } -#endif + spin_lock_irqsave(&desc->lock, flags); + desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE; + if (iflags & IRQF_VALID) + desc->status &= ~IRQ_NOREQUEST; + if (iflags & IRQF_PROBE) + desc->status &= ~IRQ_NOPROBE; + spin_unlock_irqrestore(&desc->lock, flags); } void __init init_IRQ(void) { - struct irqdesc *desc; extern void init_dma(void); int irq; + for (irq = 0; irq < NR_IRQS; irq++) + irq_desc[irq].status |= IRQ_NOREQUEST; + #ifdef CONFIG_SMP bad_irq_desc.affinity = CPU_MASK_ALL; bad_irq_desc.cpu = smp_processor_id(); #endif - for (irq = 0, desc = irq_desc; irq < NR_IRQS; irq++, desc++) { - *desc = bad_irq_desc; - INIT_LIST_HEAD(&desc->pend); - } - init_arch_irq(); init_dma(); } Index: linux.prev/arch/arm/kernel/process.c =================================================================== --- linux.prev.orig/arch/arm/kernel/process.c +++ linux.prev/arch/arm/kernel/process.c @@ -89,12 +89,12 @@ void default_idle(void) if (hlt_counter) cpu_relax(); else { - local_irq_disable(); + raw_local_irq_disable(); if (!need_resched()) { timer_dyn_reprogram(); arch_idle(); } - local_irq_enable(); + raw_local_irq_enable(); } } @@ -124,8 +124,8 @@ void cpu_idle(void) while (!need_resched()) idle(); leds_event(led_idle_end); - preempt_enable_no_resched(); - schedule(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); } } Index: linux.prev/arch/arm/kernel/semaphore.c =================================================================== --- linux.prev.orig/arch/arm/kernel/semaphore.c +++ linux.prev/arch/arm/kernel/semaphore.c @@ -49,14 +49,16 @@ * we cannot lose wakeup events. */ -void __up(struct semaphore *sem) +fastcall void __attribute_used__ __compat_up(struct compat_semaphore *sem) { wake_up(&sem->wait); } +EXPORT_SYMBOL(__compat_up); + static DEFINE_SPINLOCK(semaphore_lock); -void __sched __down(struct semaphore * sem) +fastcall void __attribute_used__ __sched __compat_down(struct compat_semaphore * sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -89,7 +91,9 @@ void __sched __down(struct semaphore * s wake_up(&sem->wait); } -int __sched __down_interruptible(struct semaphore * sem) +EXPORT_SYMBOL(__compat_down); + +fastcall int __attribute_used__ __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -140,6 +144,8 @@ int __sched __down_interruptible(struct return retval; } +EXPORT_SYMBOL(__compat_down_interruptible); + /* * Trylock failed - make sure we correct for * having decremented the count. @@ -148,7 +154,7 @@ int __sched __down_interruptible(struct * single "cmpxchg" without failure cases, * but then it wouldn't work on a 386. */ -int __down_trylock(struct semaphore * sem) +fastcall int __attribute_used__ __compat_down_trylock(struct compat_semaphore * sem) { int sleepers; unsigned long flags; @@ -168,6 +174,15 @@ int __down_trylock(struct semaphore * se return 1; } +EXPORT_SYMBOL(__compat_down_trylock); + +fastcall int compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} + +EXPORT_SYMBOL(compat_sem_is_locked); + /* * The semaphore operations have a special calling sequence that * allow us to do a simpler in-line version of them. These routines @@ -184,7 +199,7 @@ asm(" .section .sched.text,\"ax\",%progb __down_failed: \n\ stmfd sp!, {r0 - r3, lr} \n\ mov r0, ip \n\ - bl __down \n\ + bl __compat_down \n\ ldmfd sp!, {r0 - r3, pc} \n\ \n\ .align 5 \n\ @@ -192,7 +207,7 @@ __down_failed: \n\ __down_interruptible_failed: \n\ stmfd sp!, {r0 - r3, lr} \n\ mov r0, ip \n\ - bl __down_interruptible \n\ + bl __compat_down_interruptible \n\ mov ip, r0 \n\ ldmfd sp!, {r0 - r3, pc} \n\ \n\ @@ -201,7 +216,7 @@ __down_interruptible_failed: \n\ __down_trylock_failed: \n\ stmfd sp!, {r0 - r3, lr} \n\ mov r0, ip \n\ - bl __down_trylock \n\ + bl __compat_down_trylock \n\ mov ip, r0 \n\ ldmfd sp!, {r0 - r3, pc} \n\ \n\ @@ -210,7 +225,7 @@ __down_trylock_failed: \n\ __up_wakeup: \n\ stmfd sp!, {r0 - r3, lr} \n\ mov r0, ip \n\ - bl __up \n\ + bl __compat_up \n\ ldmfd sp!, {r0 - r3, pc} \n\ "); Index: linux.prev/arch/arm/kernel/signal.c =================================================================== --- linux.prev.orig/arch/arm/kernel/signal.c +++ linux.prev/arch/arm/kernel/signal.c @@ -628,6 +628,14 @@ static int do_signal(sigset_t *oldset, s siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + raw_local_irq_enable(); + preempt_check_resched(); +#endif + /* * We want the common case to go fast, which * is why we may in certain cases get here from Index: linux.prev/arch/arm/kernel/smp.c =================================================================== --- linux.prev.orig/arch/arm/kernel/smp.c +++ linux.prev/arch/arm/kernel/smp.c @@ -56,6 +56,7 @@ struct ipi_data { unsigned long bits; }; +/* FIXME */ static DEFINE_PER_CPU(struct ipi_data, ipi_data) = { .lock = SPIN_LOCK_UNLOCKED, }; @@ -348,7 +349,7 @@ static void send_ipi_message(cpumask_t c unsigned long flags; unsigned int cpu; - local_irq_save(flags); + raw_local_irq_save(flags); for_each_cpu_mask(cpu, callmap) { struct ipi_data *ipi = &per_cpu(ipi_data, cpu); @@ -363,7 +364,7 @@ static void send_ipi_message(cpumask_t c */ smp_cross_call(callmap); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -520,7 +521,7 @@ static void ipi_call_function(unsigned i cpu_clear(cpu, data->unfinished); } -static DEFINE_SPINLOCK(stop_lock); +static DEFINE_RAW_SPINLOCK(stop_lock); /* * ipi_cpu_stop - handle IPI from smp_send_stop() @@ -535,7 +536,7 @@ static void ipi_cpu_stop(unsigned int cp cpu_clear(cpu, cpu_online_map); local_fiq_disable(); - local_irq_disable(); + raw_local_irq_disable(); while (1) cpu_relax(); Index: linux.prev/arch/arm/kernel/traps.c =================================================================== --- linux.prev.orig/arch/arm/kernel/traps.c +++ linux.prev/arch/arm/kernel/traps.c @@ -177,6 +177,8 @@ void dump_stack(void) { #ifdef CONFIG_DEBUG_ERRORS __backtrace(); + print_traces(current); + show_held_locks(current); #endif } @@ -217,7 +219,7 @@ static void __die(const char *str, int e } } -DEFINE_SPINLOCK(die_lock); +DEFINE_RAW_SPINLOCK(die_lock); /* * This function is protected against re-entrancy. @@ -249,7 +251,7 @@ void notify_die(const char *str, struct } static LIST_HEAD(undef_hook); -static DEFINE_SPINLOCK(undef_lock); +static DEFINE_RAW_SPINLOCK(undef_lock); void register_undef_hook(struct undef_hook *hook) { @@ -341,7 +343,7 @@ asmlinkage void bad_mode(struct pt_regs handler[reason], processor_modes[proc_mode]); die("Oops - bad mode", regs, 0); - local_irq_disable(); + raw_local_irq_disable(); panic("bad mode"); } Index: linux.prev/arch/arm/mach-clps711x/p720t-leds.c =================================================================== --- linux.prev.orig/arch/arm/mach-clps711x/p720t-leds.c +++ linux.prev/arch/arm/mach-clps711x/p720t-leds.c @@ -36,7 +36,7 @@ static void p720t_leds_event(led_event_t unsigned long flags; u32 pddr; - local_irq_save(flags); + raw_local_irq_save(flags); switch(ledevt) { case led_idle_start: break; @@ -53,7 +53,7 @@ static void p720t_leds_event(led_event_t break; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } static int __init leds_init(void) Index: linux.prev/arch/arm/mach-clps711x/time.c =================================================================== --- linux.prev.orig/arch/arm/mach-clps711x/time.c +++ linux.prev/arch/arm/mach-clps711x/time.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include Index: linux.prev/arch/arm/mach-clps7500/core.c =================================================================== --- linux.prev.orig/arch/arm/mach-clps7500/core.c +++ linux.prev/arch/arm/mach-clps7500/core.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include Index: linux.prev/arch/arm/mach-ebsa110/core.c =================================================================== --- linux.prev.orig/arch/arm/mach-ebsa110/core.c +++ linux.prev/arch/arm/mach-ebsa110/core.c @@ -56,14 +56,14 @@ static void __init ebsa110_init_irq(void unsigned long flags; unsigned int irq; - local_irq_save(flags); + raw_local_irq_save(flags); __raw_writeb(0xff, IRQ_MCLR); __raw_writeb(0x55, IRQ_MSET); __raw_writeb(0x00, IRQ_MSET); if (__raw_readb(IRQ_MASK) != 0x55) while (1); __raw_writeb(0xff, IRQ_MCLR); /* clear all interrupt enables */ - local_irq_restore(flags); + raw_local_irq_restore(flags); for (irq = 0; irq < NR_IRQS; irq++) { set_irq_chip(irq, &ebsa110_irq_chip); Index: linux.prev/arch/arm/mach-footbridge/dc21285-timer.c =================================================================== --- linux.prev.orig/arch/arm/mach-footbridge/dc21285-timer.c +++ linux.prev/arch/arm/mach-footbridge/dc21285-timer.c @@ -6,6 +6,7 @@ */ #include #include +#include #include Index: linux.prev/arch/arm/mach-footbridge/isa-irq.c =================================================================== --- linux.prev.orig/arch/arm/mach-footbridge/isa-irq.c +++ linux.prev/arch/arm/mach-footbridge/isa-irq.c @@ -102,6 +102,17 @@ static struct irqaction irq_cascade = { static struct resource pic1_resource = { "pic1", 0x20, 0x3f }; static struct resource pic2_resource = { "pic2", 0xa0, 0xbf }; +static DEFINE_IRQ_CHAINED_TYPE(isa_irq_handler); + +static unsigned int startup_irq_disabled(unsigned int irq) +{ + return 0; +} + +/* Interrupt type for irqs which must not be + * automatically enabled in reqeust_irq */ +static struct irq_type level_type_nostart; + void __init isa_init_irq(unsigned int host_irq) { unsigned int irq; @@ -159,9 +170,11 @@ void __init isa_init_irq(unsigned int ho * There appears to be a missing pull-up * resistor on this line. */ - if (machine_is_netwinder()) - set_irq_flags(_ISA_IRQ(11), IRQF_VALID | - IRQF_PROBE | IRQF_NOAUTOEN); + if (machine_is_netwinder()) { + level_type_nostart = default_level_type; + level_type_nostart.startup = startup_irq_disabled; + set_irq_handler(_ISA_IRQ(11), &level_type_nostart); + } } } Index: linux.prev/arch/arm/mach-footbridge/isa-timer.c =================================================================== --- linux.prev.orig/arch/arm/mach-footbridge/isa-timer.c +++ linux.prev/arch/arm/mach-footbridge/isa-timer.c @@ -6,6 +6,7 @@ */ #include #include +#include #include #include Index: linux.prev/arch/arm/mach-footbridge/netwinder-hw.c =================================================================== --- linux.prev.orig/arch/arm/mach-footbridge/netwinder-hw.c +++ linux.prev/arch/arm/mach-footbridge/netwinder-hw.c @@ -68,7 +68,7 @@ static inline void wb977_ww(int reg, int /* * This is a lock for accessing ports GP1_IO_BASE and GP2_IO_BASE */ -DEFINE_SPINLOCK(gpio_lock); +DEFINE_RAW_SPINLOCK(gpio_lock); static unsigned int current_gpio_op; static unsigned int current_gpio_io; Index: linux.prev/arch/arm/mach-footbridge/netwinder-leds.c =================================================================== --- linux.prev.orig/arch/arm/mach-footbridge/netwinder-leds.c +++ linux.prev/arch/arm/mach-footbridge/netwinder-leds.c @@ -33,7 +33,7 @@ static char led_state; static char hw_led_state; static DEFINE_SPINLOCK(leds_lock); -extern spinlock_t gpio_lock; +extern raw_spinlock_t gpio_lock; static void netwinder_leds_event(led_event_t evt) { Index: linux.prev/arch/arm/mach-h720x/common.c =================================================================== --- linux.prev.orig/arch/arm/mach-h720x/common.c +++ linux.prev/arch/arm/mach-h720x/common.c @@ -163,6 +163,11 @@ h720x_gpiod_demux_handler(unsigned int i h720x_gpio_handler(mask, irq, desc, regs); } +static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioa_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(h720x_gpiob_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioc_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(h720x_gpiod_demux_handler); + #ifdef CONFIG_CPU_H7202 static void h720x_gpioe_demux_handler(unsigned int irq_unused, struct irqdesc *desc, @@ -175,6 +180,7 @@ h720x_gpioe_demux_handler(unsigned int i IRQDBG("%s mask: 0x%08x irq: %d\n",__FUNCTION__,mask,irq); h720x_gpio_handler(mask, irq, desc, regs); } +static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioe_demux_handler); #endif static struct irqchip h720x_global_chip = { Index: linux.prev/arch/arm/mach-h720x/cpu-h7202.c =================================================================== --- linux.prev.orig/arch/arm/mach-h720x/cpu-h7202.c +++ linux.prev/arch/arm/mach-h720x/cpu-h7202.c @@ -175,6 +175,8 @@ static struct irqaction h7202_timer_irq .handler = h7202_timer_interrupt, }; +static DEFINE_IRQ_CHAINED_TYPE(h7202_timerx_demux_handler); + /* * Setup TIMER0 as system timer */ Index: linux.prev/arch/arm/mach-imx/dma.c =================================================================== --- linux.prev.orig/arch/arm/mach-imx/dma.c +++ linux.prev/arch/arm/mach-imx/dma.c @@ -43,7 +43,7 @@ imx_request_dma(char *name, imx_dma_prio if (!name || !irq_handler) return -EINVAL; - local_irq_save(flags); + raw_local_irq_save(flags); /* try grabbing a DMA channel with the requested priority */ for (i = prio; i < prio + (prio == DMA_PRIO_LOW) ? 8 : 4; i++) { @@ -75,7 +75,7 @@ imx_request_dma(char *name, imx_dma_prio i = -ENODEV; } - local_irq_restore(flags); + raw_local_irq_restore(flags); return i; } @@ -91,10 +91,10 @@ imx_free_dma(int dma_ch) return; } - local_irq_save(flags); + raw_local_irq_save(flags); DIMR &= ~(1 << dma_ch); dma_channels[dma_ch].name = NULL; - local_irq_restore(flags); + raw_local_irq_restore(flags); } static irqreturn_t Index: linux.prev/arch/arm/mach-imx/irq.c =================================================================== --- linux.prev.orig/arch/arm/mach-imx/irq.c +++ linux.prev/arch/arm/mach-imx/irq.c @@ -217,6 +217,11 @@ static struct irqchip imx_gpio_chip = { .set_type = imx_gpio_irq_type, }; +static DEFINE_IRQ_CHAINED_TYPE(imx_gpioa_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(imx_gpiob_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(imx_gpioc_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(imx_gpiod_demux_handler); + void __init imx_init_irq(void) { Index: linux.prev/arch/arm/mach-imx/leds-mx1ads.c =================================================================== --- linux.prev.orig/arch/arm/mach-imx/leds-mx1ads.c +++ linux.prev/arch/arm/mach-imx/leds-mx1ads.c @@ -29,7 +29,7 @@ mx1ads_leds_event(led_event_t ledevt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (ledevt) { #ifdef CONFIG_LEDS_CPU @@ -49,5 +49,5 @@ mx1ads_leds_event(led_event_t ledevt) default: break; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-imx/time.c =================================================================== --- linux.prev.orig/arch/arm/mach-imx/time.c +++ linux.prev/arch/arm/mach-imx/time.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include Index: linux.prev/arch/arm/mach-integrator/core.c =================================================================== --- linux.prev.orig/arch/arm/mach-integrator/core.c +++ linux.prev/arch/arm/mach-integrator/core.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -117,7 +118,7 @@ arch_initcall(integrator_init); #define CM_CTRL IO_ADDRESS(INTEGRATOR_HDR_BASE) + INTEGRATOR_HDR_CTRL_OFFSET -static DEFINE_SPINLOCK(cm_lock); +static DEFINE_RAW_SPINLOCK(cm_lock); /** * cm_control - update the CM_CTRL register. Index: linux.prev/arch/arm/mach-integrator/leds.c =================================================================== --- linux.prev.orig/arch/arm/mach-integrator/leds.c +++ linux.prev/arch/arm/mach-integrator/leds.c @@ -41,7 +41,7 @@ static void integrator_leds_event(led_ev unsigned int update_alpha_leds; // yup, change the LEDs - local_irq_save(flags); + raw_local_irq_save(flags); update_alpha_leds = 0; switch(ledevt) { @@ -76,7 +76,7 @@ static void integrator_leds_event(led_ev while (__raw_readl(dbg_base + INTEGRATOR_DBG_ALPHA_OFFSET) & 1); __raw_writel(saved_leds, dbg_base + INTEGRATOR_DBG_LEDS_OFFSET); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } static int __init leds_init(void) Index: linux.prev/arch/arm/mach-integrator/pci_v3.c =================================================================== --- linux.prev.orig/arch/arm/mach-integrator/pci_v3.c +++ linux.prev/arch/arm/mach-integrator/pci_v3.c @@ -163,7 +163,7 @@ * 7:2 register number * */ -static DEFINE_SPINLOCK(v3_lock); +static DEFINE_RAW_SPINLOCK(v3_lock); #define PCI_BUS_NONMEM_START 0x00000000 #define PCI_BUS_NONMEM_SIZE SZ_256M Index: linux.prev/arch/arm/mach-integrator/platsmp.c =================================================================== --- linux.prev.orig/arch/arm/mach-integrator/platsmp.c +++ linux.prev/arch/arm/mach-integrator/platsmp.c @@ -31,7 +31,7 @@ extern void integrator_secondary_startup volatile int __cpuinitdata pen_release = -1; unsigned long __cpuinitdata phys_pen_release = 0; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); void __cpuinit platform_secondary_init(unsigned int cpu) { Index: linux.prev/arch/arm/mach-integrator/time.c =================================================================== --- linux.prev.orig/arch/arm/mach-integrator/time.c +++ linux.prev/arch/arm/mach-integrator/time.c @@ -96,7 +96,8 @@ static struct rtc_ops rtc_ops = { .set_alarm = rtc_set_alarm, }; -static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t arm_rtc_interrupt(int irq, void *dev_id, + struct pt_regs *regs) { writel(0, rtc_base + RTC_EOI); return IRQ_HANDLED; @@ -124,7 +125,7 @@ static int rtc_probe(struct amba_device xtime.tv_sec = __raw_readl(rtc_base + RTC_DR); - ret = request_irq(dev->irq[0], rtc_interrupt, SA_INTERRUPT, + ret = request_irq(dev->irq[0], arm_rtc_interrupt, SA_INTERRUPT, "rtc-pl030", dev); if (ret) goto map_out; Index: linux.prev/arch/arm/mach-ixp2000/core.c =================================================================== --- linux.prev.orig/arch/arm/mach-ixp2000/core.c +++ linux.prev/arch/arm/mach-ixp2000/core.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -276,9 +277,9 @@ void gpio_line_config(int line, int dire { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); if (direction == GPIO_OUT) { - irq_desc[line + IRQ_IXP2000_GPIO0].valid = 0; + set_irq_flags(line + IRQ_IXP2000_GPIO0, 0); /* if it's an output, it ain't an interrupt anymore */ GPIO_IRQ_falling_edge &= ~(1 << line); @@ -291,7 +292,7 @@ void gpio_line_config(int line, int dire } else if (direction == GPIO_IN) { ixp2000_reg_wrb(IXP2000_GPIO_PDCR, 1 << line); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } @@ -344,8 +345,7 @@ static int ixp2000_GPIO_irq_type(unsigne /* * Finally, mark the corresponding IRQ as valid. */ - irq_desc[irq].valid = 1; - + set_irq_flags(irq, IRQF_VALID); return 0; } @@ -449,6 +449,8 @@ static struct irqchip ixp2000_irq_chip = .unmask = ixp2000_irq_unmask }; +static DEFINE_IRQ_CHAINED_TYPE(ixp2000_GPIO_irq_handler); + void __init ixp2000_init_irq(void) { int irq; Index: linux.prev/arch/arm/mach-ixp2000/ixdp2x00.c =================================================================== --- linux.prev.orig/arch/arm/mach-ixp2000/ixdp2x00.c +++ linux.prev/arch/arm/mach-ixp2000/ixdp2x00.c @@ -146,6 +146,8 @@ static struct irqchip ixdp2x00_cpld_irq_ .unmask = ixdp2x00_irq_unmask }; +static DEFINE_IRQ_CHAINED_TYPE(ixdp2x00_irq_handler); + void ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_irqs) { unsigned int irq; @@ -168,7 +170,7 @@ void ixdp2x00_init_irq(volatile unsigned } /* Hook into PCI interrupt */ - set_irq_chained_handler(IRQ_IXP2000_PCIB, &ixdp2x00_irq_handler); + set_irq_chained_handler(IRQ_IXP2000_PCIB, ixdp2x00_irq_handler); } /************************************************************************* Index: linux.prev/arch/arm/mach-ixp2000/ixdp2x01.c =================================================================== --- linux.prev.orig/arch/arm/mach-ixp2000/ixdp2x01.c +++ linux.prev/arch/arm/mach-ixp2000/ixdp2x01.c @@ -95,6 +95,8 @@ static struct irqchip ixdp2x01_irq_chip .unmask = ixdp2x01_irq_unmask }; +static DEFINE_IRQ_CHAINED_TYPE(ixdp2x01_irq_handler); + /* * We only do anything if we are the master NPU on the board. * The slave NPU only has the ethernet chip going directly to @@ -127,7 +129,7 @@ void __init ixdp2x01_init_irq(void) } /* Hook into PCI interrupts */ - set_irq_chained_handler(IRQ_IXP2000_PCIB, &ixdp2x01_irq_handler); + set_irq_chained_handler(IRQ_IXP2000_PCIB, ixdp2x01_irq_handler); } Index: linux.prev/arch/arm/mach-ixp2000/pci.c =================================================================== --- linux.prev.orig/arch/arm/mach-ixp2000/pci.c +++ linux.prev/arch/arm/mach-ixp2000/pci.c @@ -145,7 +145,7 @@ int ixp2000_pci_abort_handler(unsigned l pci_master_aborts = 1; - local_irq_save(flags); + raw_local_irq_save(flags); temp = *(IXP2000_PCI_CONTROL); if (temp & ((1 << 8) | (1 << 5))) { ixp2000_reg_wrb(IXP2000_PCI_CONTROL, temp); @@ -158,7 +158,7 @@ int ixp2000_pci_abort_handler(unsigned l temp = *(IXP2000_PCI_CMDSTAT); } } - local_irq_restore(flags); + raw_local_irq_restore(flags); /* * If it was an imprecise abort, then we need to correct the @@ -176,7 +176,7 @@ clear_master_aborts(void) volatile u32 temp; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); temp = *(IXP2000_PCI_CONTROL); if (temp & ((1 << 8) | (1 << 5))) { ixp2000_reg_wrb(IXP2000_PCI_CONTROL, temp); @@ -189,7 +189,7 @@ clear_master_aborts(void) temp = *(IXP2000_PCI_CMDSTAT); } } - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } Index: linux.prev/arch/arm/mach-ixp4xx/common-pci.c =================================================================== --- linux.prev.orig/arch/arm/mach-ixp4xx/common-pci.c +++ linux.prev/arch/arm/mach-ixp4xx/common-pci.c @@ -53,7 +53,7 @@ unsigned long ixp4xx_pci_reg_base = 0; * these transactions are atomic or we will end up * with corrupt data on the bus or in a driver. */ -static DEFINE_SPINLOCK(ixp4xx_pci_lock); +static DEFINE_RAW_SPINLOCK(ixp4xx_pci_lock); /* * Read from PCI config space Index: linux.prev/arch/arm/mach-ixp4xx/coyote-pci.c =================================================================== --- linux.prev.orig/arch/arm/mach-ixp4xx/coyote-pci.c +++ linux.prev/arch/arm/mach-ixp4xx/coyote-pci.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include Index: linux.prev/arch/arm/mach-ixp4xx/ixdp425-pci.c =================================================================== --- linux.prev.orig/arch/arm/mach-ixp4xx/ixdp425-pci.c +++ linux.prev/arch/arm/mach-ixp4xx/ixdp425-pci.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include Index: linux.prev/arch/arm/mach-ixp4xx/ixdpg425-pci.c =================================================================== --- linux.prev.orig/arch/arm/mach-ixp4xx/ixdpg425-pci.c +++ linux.prev/arch/arm/mach-ixp4xx/ixdpg425-pci.c @@ -16,10 +16,10 @@ #include #include #include +#include #include #include -#include #include Index: linux.prev/arch/arm/mach-l7200/core.c =================================================================== --- linux.prev.orig/arch/arm/mach-l7200/core.c +++ linux.prev/arch/arm/mach-l7200/core.c @@ -7,6 +7,7 @@ */ #include #include +#include #include #include Index: linux.prev/arch/arm/mach-lh7a40x/arch-kev7a400.c =================================================================== --- linux.prev.orig/arch/arm/mach-lh7a40x/arch-kev7a400.c +++ linux.prev/arch/arm/mach-lh7a40x/arch-kev7a400.c @@ -81,6 +81,8 @@ static void kev7a400_cpld_handler (unsig } } +static DEFINE_IRQ_CHAINED_TYPE(kev7a400_cpld_handler); + void __init lh7a40x_init_board_irq (void) { int irq; Index: linux.prev/arch/arm/mach-lh7a40x/arch-lpd7a40x.c =================================================================== --- linux.prev.orig/arch/arm/mach-lh7a40x/arch-lpd7a40x.c +++ linux.prev/arch/arm/mach-lh7a40x/arch-lpd7a40x.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -173,6 +174,7 @@ static void lpd7a40x_cpld_handler (unsig desc->chip->unmask (irq); /* Level-triggered need this */ } +static DEFINE_IRQ_CHAINED_TYPE(lpd7a40x_cpld_handler); void __init lh7a40x_init_board_irq (void) { Index: linux.prev/arch/arm/mach-lh7a40x/irq-kev7a400.c =================================================================== --- linux.prev.orig/arch/arm/mach-lh7a40x/irq-kev7a400.c +++ linux.prev/arch/arm/mach-lh7a40x/irq-kev7a400.c @@ -60,6 +60,8 @@ lh7a400_cpld_handler (unsigned int irq, } } +static DEFINE_IRQ_CHAINED_TYPE(kev7a400_cpld_handler); + /* IRQ initialization */ void __init Index: linux.prev/arch/arm/mach-lh7a40x/irq-lpd7a40x.c =================================================================== --- linux.prev.orig/arch/arm/mach-lh7a40x/irq-lpd7a40x.c +++ linux.prev/arch/arm/mach-lh7a40x/irq-lpd7a40x.c @@ -71,6 +71,7 @@ static void lh7a40x_cpld_handler (unsign desc->chip->unmask (irq); /* Level-triggered need this */ } +static DEFINE_IRQ_CHAINED_TYPE(lh7a40x_cpld_handler); /* IRQ initialization */ Index: linux.prev/arch/arm/mach-lh7a40x/time.c =================================================================== --- linux.prev.orig/arch/arm/mach-lh7a40x/time.c +++ linux.prev/arch/arm/mach-lh7a40x/time.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include Index: linux.prev/arch/arm/mach-omap1/board-osk.c =================================================================== --- linux.prev.orig/arch/arm/mach-omap1/board-osk.c +++ linux.prev/arch/arm/mach-omap1/board-osk.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include Index: linux.prev/arch/arm/mach-omap1/fpga.c =================================================================== --- linux.prev.orig/arch/arm/mach-omap1/fpga.c +++ linux.prev/arch/arm/mach-omap1/fpga.c @@ -120,6 +120,8 @@ static struct irqchip omap_fpga_irq = { .unmask = fpga_unmask_irq, }; +static DEFINE_IRQ_CHAINED_TYPE(innovator_fpga_IRQ_demux); + /* * All of the FPGA interrupt request inputs except for the touchscreen are * edge-sensitive; the touchscreen is level-sensitive. The edge-sensitive Index: linux.prev/arch/arm/mach-omap1/leds-h2p2-debug.c =================================================================== --- linux.prev.orig/arch/arm/mach-omap1/leds-h2p2-debug.c +++ linux.prev/arch/arm/mach-omap1/leds-h2p2-debug.c @@ -45,7 +45,7 @@ void h2p2_dbg_leds_event(led_event_t evt static struct h2p2_dbg_fpga __iomem *fpga; static u16 led_state, hw_led_state; - local_irq_save(flags); + raw_local_irq_save(flags); if (!(led_state & LED_STATE_ENABLED) && evt != led_start) goto done; @@ -164,5 +164,5 @@ void h2p2_dbg_leds_event(led_event_t evt __raw_writew(~hw_led_state, &fpga->leds); done: - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-omap1/serial.c =================================================================== --- linux.prev.orig/arch/arm/mach-omap1/serial.c +++ linux.prev/arch/arm/mach-omap1/serial.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include Index: linux.prev/arch/arm/mach-pxa/dma.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/dma.c +++ linux.prev/arch/arm/mach-pxa/dma.c @@ -43,7 +43,7 @@ int pxa_request_dma (char *name, pxa_dma if (!name || !irq_handler) return -EINVAL; - local_irq_save(flags); + raw_local_irq_save(flags); /* try grabbing a DMA channel with the requested priority */ for (i = prio; i < prio + PXA_DMA_NBCH(prio); i++) { @@ -73,7 +73,7 @@ int pxa_request_dma (char *name, pxa_dma i = -ENODEV; } - local_irq_restore(flags); + raw_local_irq_restore(flags); return i; } @@ -88,10 +88,10 @@ void pxa_free_dma (int dma_ch) return; } - local_irq_save(flags); + raw_local_irq_save(flags); DCSR(dma_ch) = DCSR_STARTINTR|DCSR_ENDINTR|DCSR_BUSERR; dma_channels[dma_ch].name = NULL; - local_irq_restore(flags); + raw_local_irq_restore(flags); } static irqreturn_t dma_irq_handler(int irq, void *dev_id, struct pt_regs *regs) Index: linux.prev/arch/arm/mach-pxa/generic.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/generic.c +++ linux.prev/arch/arm/mach-pxa/generic.c @@ -51,7 +51,7 @@ void pxa_gpio_mode(int gpio_mode) int fn = (gpio_mode & GPIO_MD_MASK_FN) >> 8; int gafr; - local_irq_save(flags); + raw_local_irq_save(flags); if (gpio_mode & GPIO_DFLT_LOW) GPCR(gpio) = GPIO_bit(gpio); else if (gpio_mode & GPIO_DFLT_HIGH) @@ -62,7 +62,7 @@ void pxa_gpio_mode(int gpio_mode) GPDR(gpio) &= ~GPIO_bit(gpio); gafr = GAFR(gpio) & ~(0x3 << (((gpio) & 0xf)*2)); GAFR(gpio) = gafr | (fn << (((gpio) & 0xf)*2)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(pxa_gpio_mode); @@ -73,14 +73,14 @@ EXPORT_SYMBOL(pxa_gpio_mode); void pxa_set_cken(int clock, int enable) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); if (enable) CKEN |= clock; else CKEN &= ~clock; - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(pxa_set_cken); Index: linux.prev/arch/arm/mach-pxa/idp.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/idp.c +++ linux.prev/arch/arm/mach-pxa/idp.c @@ -18,6 +18,7 @@ #include #include +#include #include #include Index: linux.prev/arch/arm/mach-pxa/irq.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/irq.c +++ linux.prev/arch/arm/mach-pxa/irq.c @@ -244,6 +244,7 @@ static struct irqchip pxa_muxed_gpio_chi .set_type = pxa_gpio_irq_type, }; +static DEFINE_IRQ_CHAINED_TYPE(pxa_gpio_demux_handler); void __init pxa_init_irq(void) { Index: linux.prev/arch/arm/mach-pxa/leds-idp.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/leds-idp.c +++ linux.prev/arch/arm/mach-pxa/leds-idp.c @@ -34,7 +34,7 @@ void idp_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -113,5 +113,5 @@ void idp_leds_event(led_event_t evt) else IDP_CPLD_LED_CONTROL |= IDP_LEDS_MASK; - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-pxa/leds-lubbock.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/leds-lubbock.c +++ linux.prev/arch/arm/mach-pxa/leds-lubbock.c @@ -48,7 +48,7 @@ void lubbock_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -122,5 +122,5 @@ void lubbock_leds_event(led_event_t evt) else LUB_DISC_BLNK_LED |= 0xff; - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-pxa/leds-mainstone.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/leds-mainstone.c +++ linux.prev/arch/arm/mach-pxa/leds-mainstone.c @@ -43,7 +43,7 @@ void mainstone_leds_event(led_event_t ev { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -117,5 +117,5 @@ void mainstone_leds_event(led_event_t ev else MST_LEDCTRL |= 0xff; - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-pxa/lubbock.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/lubbock.c +++ linux.prev/arch/arm/mach-pxa/lubbock.c @@ -52,9 +52,9 @@ void lubbock_set_misc_wr(unsigned int ma { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); LUB_MISC_WR = (LUB_MISC_WR & ~mask) | (set & mask); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(lubbock_set_misc_wr); @@ -95,6 +95,8 @@ static void lubbock_irq_handler(unsigned } while (pending); } +static DEFINE_IRQ_CHAINED_TYPE(lubbock_irq_handler); + static void __init lubbock_init_irq(void) { int irq; Index: linux.prev/arch/arm/mach-pxa/mainstone.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/mainstone.c +++ linux.prev/arch/arm/mach-pxa/mainstone.c @@ -84,6 +84,8 @@ static void mainstone_irq_handler(unsign } while (pending); } +static DEFINE_IRQ_CHAINED_TYPE(mainstone_irq_handler); + static void __init mainstone_init_irq(void) { int irq; Index: linux.prev/arch/arm/mach-rpc/dma.c =================================================================== --- linux.prev.orig/arch/arm/mach-rpc/dma.c +++ linux.prev/arch/arm/mach-rpc/dma.c @@ -171,11 +171,11 @@ static void iomd_disable_dma(dmach_t cha unsigned long dma_base = dma->dma_base; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); if (dma->state != ~DMA_ST_AB) disable_irq(dma->dma_irq); iomd_writeb(0, dma_base + CR); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static int iomd_set_dma_speed(dmach_t channel, dma_t *dma, int cycle) Index: linux.prev/arch/arm/mach-rpc/irq.c =================================================================== --- linux.prev.orig/arch/arm/mach-rpc/irq.c +++ linux.prev/arch/arm/mach-rpc/irq.c @@ -112,6 +112,15 @@ static struct irqchip iomd_fiq_chip = { .unmask = iomd_unmask_irq_fiq, }; +static unsigned int startup_irq_disabled(unsigned int irq) +{ + return 0; +} + +/* Interrupt type for irqs which must not be + * automatically enabled in reqeust_irq */ +static struct irq_type level_type_nostart; + void __init rpc_init_irq(void) { unsigned int irq, flags; @@ -121,16 +130,15 @@ void __init rpc_init_irq(void) iomd_writeb(0, IOMD_FIQMASK); iomd_writeb(0, IOMD_DMAMASK); + level_type_nostart = default_level_type; + level_type_nostart.startup = startup_irq_disabled; + for (irq = 0; irq < NR_IRQS; irq++) { flags = IRQF_VALID; if (irq <= 6 || (irq >= 9 && irq <= 15)) flags |= IRQF_PROBE; - if (irq == 21 || (irq >= 16 && irq <= 19) || - irq == IRQ_KEYBOARDTX) - flags |= IRQF_NOAUTOEN; - switch (irq) { case 0 ... 7: set_irq_chip(irq, &iomd_a_chip); @@ -155,6 +163,10 @@ void __init rpc_init_irq(void) set_irq_flags(irq, IRQF_VALID); break; } + + if (irq == 21 || (irq >= 16 && irq <= 19) || + irq == IRQ_KEYBOARDTX) + set_irq_handler(irq, &level_type_nostart); } init_FIQ(); Index: linux.prev/arch/arm/mach-s3c2410/bast-irq.c =================================================================== --- linux.prev.orig/arch/arm/mach-s3c2410/bast-irq.c +++ linux.prev/arch/arm/mach-s3c2410/bast-irq.c @@ -136,13 +136,15 @@ bast_irq_pc104_demux(unsigned int irq, for (i = 0; stat != 0; i++, stat >>= 1) { if (stat & 1) { irqno = bast_pc104_irqs[i]; - - desc_handle_irq(irqno, irq_desc + irqno, regs); + desc = irq_desc + irqno; + desc_handle_irq(irqno, desc, regs); } } } } +DEFINE_IRQ_CHAINED_TYPE(bast_irq_pc104_demux); + static __init int bast_irq_init(void) { unsigned int i; @@ -156,7 +158,7 @@ static __init int bast_irq_init(void) set_irq_chained_handler(IRQ_ISA, bast_irq_pc104_demux); - /* reigster our IRQs */ + /* register our IRQs */ for (i = 0; i < 4; i++) { unsigned int irqno = bast_pc104_irqs[i]; Index: linux.prev/arch/arm/mach-s3c2410/clock.c =================================================================== --- linux.prev.orig/arch/arm/mach-s3c2410/clock.c +++ linux.prev/arch/arm/mach-s3c2410/clock.c @@ -61,7 +61,7 @@ void inline s3c24xx_clk_enable(unsigned unsigned long clkcon; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); clkcon = __raw_readl(S3C2410_CLKCON); clkcon &= ~clocks; @@ -74,7 +74,7 @@ void inline s3c24xx_clk_enable(unsigned __raw_writel(clkcon, S3C2410_CLKCON); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* enable and disable calls for use with the clk struct */ Index: linux.prev/arch/arm/mach-s3c2410/dma.c =================================================================== --- linux.prev.orig/arch/arm/mach-s3c2410/dma.c +++ linux.prev/arch/arm/mach-s3c2410/dma.c @@ -329,11 +329,11 @@ static int s3c2410_dma_start(s3c2410_dma pr_debug("s3c2410_start_dma: channel=%d\n", chan->number); - local_irq_save(flags); + raw_local_irq_save(flags); if (chan->state == S3C2410_DMA_RUNNING) { pr_debug("s3c2410_start_dma: already running (%d)\n", chan->state); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -348,7 +348,7 @@ static int s3c2410_dma_start(s3c2410_dma printk(KERN_ERR "dma%d: channel has nothing loaded\n", chan->number); chan->state = S3C2410_DMA_IDLE; - local_irq_restore(flags); + raw_local_irq_restore(flags); return -EINVAL; } @@ -385,7 +385,7 @@ static int s3c2410_dma_start(s3c2410_dma dbg_showchan(chan); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -451,7 +451,7 @@ int s3c2410_dma_enqueue(unsigned int cha buf->id = id; buf->magic = BUF_MAGIC; - local_irq_save(flags); + raw_local_irq_save(flags); if (chan->curr == NULL) { /* we've got nothing loaded... */ @@ -485,7 +485,7 @@ int s3c2410_dma_enqueue(unsigned int cha "timeout loading buffer\n", chan->number); dbg_showchan(chan); - local_irq_restore(flags); + raw_local_irq_restore(flags); return -EINVAL; } } @@ -499,7 +499,7 @@ int s3c2410_dma_enqueue(unsigned int cha } } - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -661,9 +661,9 @@ s3c2410_dma_irq(int irq, void *devpw, st return IRQ_HANDLED; } - local_irq_save(flags); + raw_local_irq_save(flags); s3c2410_dma_loadbuffer(chan, chan->next); - local_irq_restore(flags); + raw_local_irq_restore(flags); } else { s3c2410_dma_lastxfer(chan); @@ -698,14 +698,14 @@ int s3c2410_dma_request(unsigned int cha check_channel(channel); - local_irq_save(flags); + raw_local_irq_save(flags); dbg_showchan(chan); if (chan->in_use) { if (client != chan->client) { printk(KERN_ERR "dma%d: already in use\n", channel); - local_irq_restore(flags); + raw_local_irq_restore(flags); return -EBUSY; } else { printk(KERN_ERR "dma%d: client already has channel\n", channel); @@ -724,7 +724,7 @@ int s3c2410_dma_request(unsigned int cha if (err) { chan->in_use = 0; - local_irq_restore(flags); + raw_local_irq_restore(flags); printk(KERN_ERR "%s: cannot get IRQ %d for DMA %d\n", client->name, chan->irq, chan->number); @@ -735,7 +735,7 @@ int s3c2410_dma_request(unsigned int cha chan->irq_enabled = 1; } - local_irq_restore(flags); + raw_local_irq_restore(flags); /* need to setup */ @@ -764,7 +764,7 @@ int s3c2410_dma_free(dmach_t channel, s3 check_channel(channel); - local_irq_save(flags); + raw_local_irq_save(flags); if (chan->client != client) { @@ -789,7 +789,7 @@ int s3c2410_dma_free(dmach_t channel, s3 free_irq(chan->irq, (void *)chan); chan->irq_claimed = 0; - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -805,7 +805,7 @@ static int s3c2410_dma_dostop(s3c2410_dm dbg_showchan(chan); - local_irq_save(flags); + raw_local_irq_save(flags); s3c2410_dma_call_op(chan, S3C2410_DMAOP_STOP); @@ -823,7 +823,7 @@ static int s3c2410_dma_dostop(s3c2410_dm chan->state = S3C2410_DMA_IDLE; chan->load_state = S3C2410_DMALOAD_NONE; - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -840,7 +840,7 @@ static int s3c2410_dma_flush(s3c2410_dma pr_debug("%s:\n", __FUNCTION__); - local_irq_save(flags); + raw_local_irq_save(flags); if (chan->state != S3C2410_DMA_IDLE) { pr_debug("%s: stopping channel...\n", __FUNCTION__ ); @@ -865,7 +865,7 @@ static int s3c2410_dma_flush(s3c2410_dma } } - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } Index: linux.prev/arch/arm/mach-s3c2410/gpio.c =================================================================== --- linux.prev.orig/arch/arm/mach-s3c2410/gpio.c +++ linux.prev/arch/arm/mach-s3c2410/gpio.c @@ -80,7 +80,7 @@ void s3c2410_gpio_cfgpin(unsigned int pi /* modify the specified register wwith IRQs off */ - local_irq_save(flags); + raw_local_irq_save(flags); con = __raw_readl(base + 0x00); con &= ~mask; @@ -88,7 +88,7 @@ void s3c2410_gpio_cfgpin(unsigned int pi __raw_writel(con, base + 0x00); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(s3c2410_gpio_cfgpin); @@ -119,14 +119,14 @@ void s3c2410_gpio_pullup(unsigned int pi if (pin < S3C2410_GPIO_BANKB) return; - local_irq_save(flags); + raw_local_irq_save(flags); up = __raw_readl(base + 0x08); up &= ~(1L << offs); up |= to << offs; __raw_writel(up, base + 0x08); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(s3c2410_gpio_pullup); @@ -138,14 +138,14 @@ void s3c2410_gpio_setpin(unsigned int pi unsigned long flags; unsigned long dat; - local_irq_save(flags); + raw_local_irq_save(flags); dat = __raw_readl(base + 0x04); dat &= ~(1 << offs); dat |= to << offs; __raw_writel(dat, base + 0x04); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(s3c2410_gpio_setpin); @@ -165,12 +165,12 @@ unsigned int s3c2410_modify_misccr(unsig unsigned long flags; unsigned long misccr; - local_irq_save(flags); + raw_local_irq_save(flags); misccr = __raw_readl(S3C2410_MISCCR); misccr &= ~clear; misccr ^= change; __raw_writel(misccr, S3C2410_MISCCR); - local_irq_restore(flags); + raw_local_irq_restore(flags); return misccr; } @@ -211,7 +211,7 @@ int s3c2410_gpio_irqfilter(unsigned int pin -= S3C2410_GPG8_EINT16; reg += pin & ~3; - local_irq_save(flags); + raw_local_irq_save(flags); /* update filter width and clock source */ @@ -227,7 +227,7 @@ int s3c2410_gpio_irqfilter(unsigned int val |= on << ((pin * 4) + 3); __raw_writel(val, S3C2410_EXTINT2); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } Index: linux.prev/arch/arm/mach-s3c2410/irq.c =================================================================== --- linux.prev.orig/arch/arm/mach-s3c2410/irq.c +++ linux.prev/arch/arm/mach-s3c2410/irq.c @@ -573,6 +573,11 @@ s3c_irq_demux_uart2(unsigned int irq, } +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart0); +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart1); +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart2); +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_adc); + /* s3c24xx_init_irq * * Initialise S3C2410 IRQ system Index: linux.prev/arch/arm/mach-s3c2410/s3c2440-dsc.c =================================================================== --- linux.prev.orig/arch/arm/mach-s3c2410/s3c2440-dsc.c +++ linux.prev/arch/arm/mach-s3c2410/s3c2440-dsc.c @@ -45,14 +45,14 @@ int s3c2440_set_dsc(unsigned int pin, un base = (pin & S3C2440_SELECT_DSC1) ? S3C2440_DSC1 : S3C2440_DSC0; mask = 3 << S3C2440_DSC_GETSHIFT(pin); - local_irq_save(flags); + raw_local_irq_save(flags); val = __raw_readl(base); val &= ~mask; val |= value & mask; __raw_writel(val, base); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } Index: linux.prev/arch/arm/mach-s3c2410/s3c2440-irq.c =================================================================== --- linux.prev.orig/arch/arm/mach-s3c2410/s3c2440-irq.c +++ linux.prev/arch/arm/mach-s3c2410/s3c2440-irq.c @@ -157,6 +157,9 @@ static struct irqchip s3c_irq_cam = { .ack = s3c_irq_cam_ack, }; +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_wdtac97); +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_cam); + static int s3c2440_irq_add(struct sys_device *sysdev) { unsigned int irqno; Index: linux.prev/arch/arm/mach-s3c2410/time.c =================================================================== --- linux.prev.orig/arch/arm/mach-s3c2410/time.c +++ linux.prev/arch/arm/mach-s3c2410/time.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include Index: linux.prev/arch/arm/mach-sa1100/assabet.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/assabet.c +++ linux.prev/arch/arm/mach-sa1100/assabet.c @@ -61,10 +61,10 @@ void ASSABET_BCR_frob(unsigned int mask, { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); BCR_value = (BCR_value & ~mask) | val; ASSABET_BCR = BCR_value; - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(ASSABET_BCR_frob); Index: linux.prev/arch/arm/mach-sa1100/badge4.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/badge4.c +++ linux.prev/arch/arm/mach-sa1100/badge4.c @@ -227,7 +227,7 @@ void badge4_set_5V(unsigned subsystem, i unsigned long flags; unsigned old_5V_bitmap; - local_irq_save(flags); + raw_local_irq_save(flags); old_5V_bitmap = badge4_5V_bitmap; @@ -240,15 +240,22 @@ void badge4_set_5V(unsigned subsystem, i /* detect on->off and off->on transitions */ if ((!old_5V_bitmap) && (badge4_5V_bitmap)) { /* was off, now on */ - printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__); GPSR = BADGE4_GPIO_PCMEN5V; } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) { /* was on, now off */ - printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__); GPCR = BADGE4_GPIO_PCMEN5V; } - local_irq_restore(flags); + raw_local_irq_restore(flags); + + /* detect on->off and off->on transitions */ + if ((!old_5V_bitmap) && (badge4_5V_bitmap)) { + /* was off, now on */ + printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__); + } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) { + /* was on, now off */ + printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__); + } } EXPORT_SYMBOL(badge4_set_5V); Index: linux.prev/arch/arm/mach-sa1100/cerf.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/cerf.c +++ linux.prev/arch/arm/mach-sa1100/cerf.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include Index: linux.prev/arch/arm/mach-sa1100/cpu-sa1110.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/cpu-sa1110.c +++ linux.prev/arch/arm/mach-sa1100/cpu-sa1110.c @@ -282,7 +282,7 @@ static int sa1110_target(struct cpufreq_ * This means that we won't access SDRAM for the duration of * the programming. */ - local_irq_save(flags); + raw_local_irq_save(flags); asm("mcr p15, 0, %0, c7, c10, 4" : : "r" (0)); udelay(10); __asm__ __volatile__(" \n\ @@ -303,7 +303,7 @@ static int sa1110_target(struct cpufreq_ : "r" (&MDCNFG), "r" (&PPCR), "0" (sd.mdcnfg), "r" (sd.mdrefr), "r" (sd.mdcas[0]), "r" (sd.mdcas[1]), "r" (sd.mdcas[2]), "r" (ppcr)); - local_irq_restore(flags); + raw_local_irq_restore(flags); /* * Now, return the SDRAM refresh back to normal. Index: linux.prev/arch/arm/mach-sa1100/dma.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/dma.c +++ linux.prev/arch/arm/mach-sa1100/dma.c @@ -227,7 +227,7 @@ int sa1100_start_dma(dma_regs_t *regs, d if (size > MAX_DMA_SIZE) return -EOVERFLOW; - local_irq_save(flags); + raw_local_irq_save(flags); status = regs->RdDCSR; /* If both DMA buffers are started, there's nothing else we can do. */ @@ -262,7 +262,7 @@ int sa1100_start_dma(dma_regs_t *regs, d ret = 0; out: - local_irq_restore(flags); + raw_local_irq_restore(flags); return ret; } Index: linux.prev/arch/arm/mach-sa1100/generic.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/generic.c +++ linux.prev/arch/arm/mach-sa1100/generic.c @@ -138,7 +138,7 @@ unsigned long long sched_clock(void) static void sa1100_power_off(void) { mdelay(100); - local_irq_disable(); + raw_local_irq_disable(); /* disable internal oscillator, float CS lines */ PCFR = (PCFR_OPDE | PCFR_FP | PCFR_FS); /* enable wake-up on GPIO0 (Assabet...) */ @@ -411,7 +411,7 @@ void __init sa1110_mb_disable(void) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); PGSR &= ~GPIO_MBGNT; GPCR = GPIO_MBGNT; @@ -419,7 +419,7 @@ void __init sa1110_mb_disable(void) GAFR &= ~(GPIO_MBGNT | GPIO_MBREQ); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -430,7 +430,7 @@ void __init sa1110_mb_enable(void) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); PGSR &= ~GPIO_MBGNT; GPCR = GPIO_MBGNT; @@ -439,6 +439,6 @@ void __init sa1110_mb_enable(void) GAFR |= (GPIO_MBGNT | GPIO_MBREQ); TUCR |= TUCR_MR; - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-sa1100/h3600.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/h3600.c +++ linux.prev/arch/arm/mach-sa1100/h3600.c @@ -331,7 +331,7 @@ static void h3100_control_egpio(enum ipa } if (egpio || gpio) { - local_irq_save(flags); + raw_local_irq_save(flags); if (setp) { h3100_egpio |= egpio; GPSR = gpio; @@ -340,7 +340,7 @@ static void h3100_control_egpio(enum ipa GPCR = gpio; } H3100_EGPIO = h3100_egpio; - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -463,13 +463,13 @@ static void h3600_control_egpio(enum ipa } if (egpio) { - local_irq_save(flags); + raw_local_irq_save(flags); if (setp) h3600_egpio |= egpio; else h3600_egpio &= ~egpio; H3600_EGPIO = h3600_egpio; - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -800,6 +800,8 @@ static void h3800_unmask_gpio_irq(unsign H3800_ASIC2_GPIINTSTAT |= mask; } +static DEFINE_IRQ_CHAINED_TYPE(h3800_IRQ_demux); + static void __init h3800_init_irq(void) { int i; @@ -838,7 +840,7 @@ static void __init h3800_init_irq(void) } #endif set_irq_type(IRQ_GPIO_H3800_ASIC, IRQT_RISING); - set_irq_chained_handler(IRQ_GPIO_H3800_ASIC, &h3800_IRQ_demux); + set_irq_chained_handler(IRQ_GPIO_H3800_ASIC, h3800_IRQ_demux); } Index: linux.prev/arch/arm/mach-sa1100/irq.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/irq.c +++ linux.prev/arch/arm/mach-sa1100/irq.c @@ -11,12 +11,13 @@ */ #include #include +#include +#include #include #include #include #include -#include #include #include "generic.h" @@ -281,6 +282,8 @@ static int __init sa1100irq_init_devicef return sysdev_register(&sa1100irq_device); } +static DEFINE_IRQ_CHAINED_TYPE(sa1100_high_gpio_handler); + device_initcall(sa1100irq_init_devicefs); void __init sa1100_init_irq(void) Index: linux.prev/arch/arm/mach-sa1100/leds-assabet.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/leds-assabet.c +++ linux.prev/arch/arm/mach-sa1100/leds-assabet.c @@ -32,7 +32,7 @@ void assabet_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -111,5 +111,5 @@ void assabet_leds_event(led_event_t evt) if (led_state & LED_STATE_ENABLED) ASSABET_BCR_frob(ASSABET_BCR_LED_MASK, hw_led_state); - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-sa1100/leds-badge4.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/leds-badge4.c +++ linux.prev/arch/arm/mach-sa1100/leds-badge4.c @@ -36,7 +36,7 @@ void badge4_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -108,5 +108,5 @@ void badge4_leds_event(led_event_t evt) GPCR = hw_led_state ^ LED_MASK; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-sa1100/leds-cerf.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/leds-cerf.c +++ linux.prev/arch/arm/mach-sa1100/leds-cerf.c @@ -29,7 +29,7 @@ void cerf_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -107,5 +107,5 @@ void cerf_leds_event(led_event_t evt) GPCR = hw_led_state ^ LED_MASK; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-sa1100/leds-hackkit.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/leds-hackkit.c +++ linux.prev/arch/arm/mach-sa1100/leds-hackkit.c @@ -33,7 +33,7 @@ void hackkit_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch(evt) { case led_start: @@ -109,5 +109,5 @@ void hackkit_leds_event(led_event_t evt) GPCR = hw_led_state ^ LED_MASK; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-sa1100/leds-lart.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/leds-lart.c +++ linux.prev/arch/arm/mach-sa1100/leds-lart.c @@ -32,7 +32,7 @@ void lart_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch(evt) { case led_start: @@ -98,5 +98,5 @@ void lart_leds_event(led_event_t evt) GPCR = hw_led_state ^ LED_MASK; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-sa1100/neponset.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/neponset.c +++ linux.prev/arch/arm/mach-sa1100/neponset.c @@ -137,6 +137,8 @@ static struct sa1100_port_fns neponset_p .get_mctrl = neponset_get_mctrl, }; +static DEFINE_IRQ_CHAINED_TYPE(neponset_irq_handler); + static int neponset_probe(struct platform_device *dev) { sa1100_register_uart_fns(&neponset_port_fns); Index: linux.prev/arch/arm/mach-sa1100/pleb.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/pleb.c +++ linux.prev/arch/arm/mach-sa1100/pleb.c @@ -7,6 +7,7 @@ #include #include #include +#include #include Index: linux.prev/arch/arm/mach-sa1100/simpad.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/simpad.c +++ linux.prev/arch/arm/mach-sa1100/simpad.c @@ -174,7 +174,7 @@ static void __init simpad_map_io(void) static void simpad_power_off(void) { - local_irq_disable(); // was cli + raw_local_irq_disable(); // was cli set_cs3(0x800); /* only SD_MEDIAQ */ /* disable internal oscillator, float CS lines */ @@ -191,7 +191,7 @@ static void simpad_power_off(void) PMCR = PMCR_SF; while(1); - local_irq_enable(); /* we won't ever call it */ + raw_local_irq_enable(); /* we won't ever call it */ } Index: linux.prev/arch/arm/mach-sa1100/time.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/time.c +++ linux.prev/arch/arm/mach-sa1100/time.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include Index: linux.prev/arch/arm/mach-shark/core.c =================================================================== --- linux.prev.orig/arch/arm/mach-shark/core.c +++ linux.prev/arch/arm/mach-shark/core.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include Index: linux.prev/arch/arm/mach-shark/leds.c =================================================================== --- linux.prev.orig/arch/arm/mach-shark/leds.c +++ linux.prev/arch/arm/mach-shark/leds.c @@ -33,7 +33,7 @@ static char led_state; static short hw_led_state; static short saved_state; -static DEFINE_SPINLOCK(leds_lock); +static DEFINE_RAW_SPINLOCK(leds_lock); short sequoia_read(int addr) { outw(addr,0x24); Index: linux.prev/arch/arm/mach-versatile/core.c =================================================================== --- linux.prev.orig/arch/arm/mach-versatile/core.c +++ linux.prev/arch/arm/mach-versatile/core.c @@ -113,6 +113,8 @@ sic_handle_irq(unsigned int irq, struct } while (status); } +static DEFINE_IRQ_CHAINED_TYPE(sic_handle_irq); + #if 1 #define IRQ_MMCI0A IRQ_VICSOURCE22 #define IRQ_AACI IRQ_VICSOURCE24 @@ -162,7 +164,7 @@ void __init versatile_init_irq(void) } } - set_irq_handler(IRQ_VICSOURCE31, sic_handle_irq); + set_irq_chained_handler(IRQ_VICSOURCE31, sic_handle_irq); vic_unmask_irq(IRQ_VICSOURCE31); /* Do second interrupt controller */ @@ -785,7 +787,7 @@ static void versatile_leds_event(led_eve unsigned long flags; u32 val; - local_irq_save(flags); + raw_local_irq_save(flags); val = readl(VA_LEDS_BASE); switch (ledevt) { @@ -810,7 +812,7 @@ static void versatile_leds_event(led_eve } writel(val, VA_LEDS_BASE); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #endif /* CONFIG_LEDS */ Index: linux.prev/arch/arm/mm/consistent.c =================================================================== --- linux.prev.orig/arch/arm/mm/consistent.c +++ linux.prev/arch/arm/mm/consistent.c @@ -30,7 +30,7 @@ * This is the page table (2MB) covering uncached, DMA consistent allocations */ static pte_t *consistent_pte; -static DEFINE_SPINLOCK(consistent_lock); +static DEFINE_RAW_SPINLOCK(consistent_lock); /* * VM region handling support. Index: linux.prev/arch/arm/mm/copypage-v4mc.c =================================================================== --- linux.prev.orig/arch/arm/mm/copypage-v4mc.c +++ linux.prev/arch/arm/mm/copypage-v4mc.c @@ -29,7 +29,7 @@ #define TOP_PTE(x) pte_offset_kernel(top_pmd, x) -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_RAW_SPINLOCK(minicache_lock); /* * ARMv4 mini-dcache optimised copy_user_page @@ -43,7 +43,7 @@ static DEFINE_SPINLOCK(minicache_lock); * instruction. If your processor does not supply this, you have to write your * own copy_user_page that does the right thing. */ -static void __attribute__((naked)) +static void notrace __attribute__((naked)) mc_copy_user_page(void *from, void *to) { asm volatile( @@ -82,7 +82,7 @@ void v4_mc_copy_user_page(void *kto, con /* * ARMv4 optimised clear_user_page */ -void __attribute__((naked)) +void notrace __attribute__((naked)) v4_mc_clear_user_page(void *kaddr, unsigned long vaddr) { asm volatile( Index: linux.prev/arch/arm/mm/copypage-v6.c =================================================================== --- linux.prev.orig/arch/arm/mm/copypage-v6.c +++ linux.prev/arch/arm/mm/copypage-v6.c @@ -26,7 +26,7 @@ #define TOP_PTE(x) pte_offset_kernel(top_pmd, x) -static DEFINE_SPINLOCK(v6_lock); +static DEFINE_RAW_SPINLOCK(v6_lock); /* * Copy the user page. No aliasing to deal with so we can just Index: linux.prev/arch/arm/mm/copypage-xscale.c =================================================================== --- linux.prev.orig/arch/arm/mm/copypage-xscale.c +++ linux.prev/arch/arm/mm/copypage-xscale.c @@ -31,7 +31,7 @@ #define TOP_PTE(x) pte_offset_kernel(top_pmd, x) -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_RAW_SPINLOCK(minicache_lock); /* * XScale mini-dcache optimised copy_user_page @@ -41,7 +41,7 @@ static DEFINE_SPINLOCK(minicache_lock); * Dcache aliasing issue. The writes will be forwarded to the write buffer, * and merged as appropriate. */ -static void __attribute__((naked)) +static void notrace __attribute__((naked)) mc_copy_user_page(void *from, void *to) { /* @@ -104,7 +104,7 @@ void xscale_mc_copy_user_page(void *kto, /* * XScale optimised clear_user_page */ -void __attribute__((naked)) +void notrace __attribute__((naked)) xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr) { asm volatile( Index: linux.prev/arch/arm/mm/fault-armv.c =================================================================== --- linux.prev.orig/arch/arm/mm/fault-armv.c +++ linux.prev/arch/arm/mm/fault-armv.c @@ -166,7 +166,7 @@ static int __init check_writebuffer(unsi { register unsigned long zero = 0, one = 1, val; - local_irq_disable(); + raw_local_irq_disable(); mb(); *p1 = one; mb(); @@ -174,7 +174,7 @@ static int __init check_writebuffer(unsi mb(); val = *p1; mb(); - local_irq_enable(); + raw_local_irq_enable(); return val != zero; } Index: linux.prev/arch/arm/mm/fault.c =================================================================== --- linux.prev.orig/arch/arm/mm/fault.c +++ linux.prev/arch/arm/mm/fault.c @@ -216,7 +216,7 @@ out: return fault; } -static int +static notrace int do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { struct task_struct *tsk; @@ -316,7 +316,7 @@ no_context: * interrupt or a critical region, and should only copy the information * from the master page table, nothing more. */ -static int +static notrace int do_translation_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { @@ -362,7 +362,7 @@ bad_area: * Some section permission faults need to be handled gracefully. * They can happen due to a __{get,put}_user during an oops. */ -static int +static notrace int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { struct task_struct *tsk = current; @@ -373,7 +373,7 @@ do_sect_fault(unsigned long addr, unsign /* * This abort handler always returns "fault". */ -static int +static notrace int do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { return 1; @@ -428,7 +428,7 @@ static struct fsr_info { { do_bad, SIGBUS, 0, "unknown 31" } }; -void __init +void __init notrace hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), int sig, const char *name) { @@ -442,7 +442,7 @@ hook_fault_code(int nr, int (*fn)(unsign /* * Dispatch a data abort to the relevant handler. */ -asmlinkage void +asmlinkage notrace void do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6); @@ -461,7 +461,7 @@ do_DataAbort(unsigned long addr, unsigne notify_die("", regs, &info, fsr, 0); } -asmlinkage void +asmlinkage notrace void do_PrefetchAbort(unsigned long addr, struct pt_regs *regs) { do_translation_fault(addr, 0, regs); Index: linux.prev/arch/arm/mm/init.c =================================================================== --- linux.prev.orig/arch/arm/mm/init.c +++ linux.prev/arch/arm/mm/init.c @@ -28,7 +28,7 @@ #define TABLE_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t)) -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern void _stext, _text, _etext, __data_start, _end, __init_begin, __init_end; Index: linux.prev/arch/arm/plat-omap/clock.c =================================================================== --- linux.prev.orig/arch/arm/plat-omap/clock.c +++ linux.prev/arch/arm/plat-omap/clock.c @@ -28,7 +28,7 @@ LIST_HEAD(clocks); static DECLARE_MUTEX(clocks_sem); -DEFINE_SPINLOCK(clockfw_lock); +DEFINE_RAW_SPINLOCK(clockfw_lock); static struct clk_functions *arch_clock; Index: linux.prev/arch/arm/plat-omap/dma.c =================================================================== --- linux.prev.orig/arch/arm/plat-omap/dma.c +++ linux.prev/arch/arm/plat-omap/dma.c @@ -557,7 +557,7 @@ void omap_clear_dma(int lch) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); if (cpu_class_is_omap1()) { int status; @@ -574,7 +574,7 @@ void omap_clear_dma(int lch) omap_writel(0, lch_base + i); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void omap_start_dma(int lch) @@ -903,7 +903,7 @@ static struct irqaction omap24xx_dma_irq /*----------------------------------------------------------------------------*/ static struct lcd_dma_info { - spinlock_t lock; + raw_spinlock_t lock; int reserved; void (* callback)(u16 status, void *data); void *cb_data; Index: linux.prev/arch/arm/plat-omap/gpio.c =================================================================== --- linux.prev.orig/arch/arm/plat-omap/gpio.c +++ linux.prev/arch/arm/plat-omap/gpio.c @@ -121,7 +121,7 @@ struct gpio_bank { u32 reserved_map; u32 suspend_wakeup; u32 saved_wakeup; - spinlock_t lock; + raw_spinlock_t lock; }; #define METHOD_MPUIO 0 @@ -736,7 +736,7 @@ static void gpio_irq_handler(unsigned in desc->chip->ack(irq); - bank = (struct gpio_bank *) desc->data; + bank = (struct gpio_bank *) desc->handler_data; if (bank->method == METHOD_MPUIO) isr_reg = bank->base + OMAP_MPUIO_GPIO_INT; #ifdef CONFIG_ARCH_OMAP15XX @@ -837,6 +837,8 @@ static struct irqchip mpuio_irq_chip = { .unmask = mpuio_unmask_irq }; +static DEFINE_IRQ_CHAINED_TYPE(gpio_irq_handler); + static int initialized; static struct clk * gpio_ick; static struct clk * gpio_fck; Index: linux.prev/arch/arm/plat-omap/mux.c =================================================================== --- linux.prev.orig/arch/arm/plat-omap/mux.c +++ linux.prev/arch/arm/plat-omap/mux.c @@ -57,7 +57,7 @@ int __init omap_mux_register(struct pin_ */ int __init_or_module omap_cfg_reg(const unsigned long index) { - static DEFINE_SPINLOCK(mux_spin_lock); + static DEFINE_RAW_SPINLOCK(mux_spin_lock); unsigned long flags; struct pin_config *cfg; Index: linux.prev/arch/arm/plat-omap/pm.c =================================================================== --- linux.prev.orig/arch/arm/plat-omap/pm.c +++ linux.prev/arch/arm/plat-omap/pm.c @@ -82,11 +82,11 @@ void omap_pm_idle(void) * seconds for wait for interrupt. */ - local_irq_disable(); + raw_local_irq_disable(); local_fiq_disable(); if (need_resched()) { local_fiq_enable(); - local_irq_enable(); + raw_local_irq_enable(); return; } mask32 = omap_readl(ARM_SYSST); @@ -111,7 +111,7 @@ void omap_pm_idle(void) omap_sram_idle(); local_fiq_enable(); - local_irq_enable(); + raw_local_irq_enable(); } /* @@ -182,7 +182,7 @@ void omap_pm_suspend(void) * Step 1: turn off interrupts (FIXME: NOTE: already disabled) */ - local_irq_disable(); + raw_local_irq_disable(); local_fiq_disable(); /* @@ -335,7 +335,7 @@ void omap_pm_suspend(void) * Reenable interrupts */ - local_irq_enable(); + raw_local_irq_enable(); local_fiq_enable(); omap_serial_wake_trigger(0); Index: linux.prev/arch/arm26/boot/compressed/misc.c =================================================================== --- linux.prev.orig/arch/arm26/boot/compressed/misc.c +++ linux.prev/arch/arm26/boot/compressed/misc.c @@ -184,6 +184,7 @@ static ulg free_mem_ptr_end; #define HEAP_SIZE 0x2000 +#define ZLIB_INFLATE_NO_INFLATE_LOCK #include "../../../../lib/inflate.c" #ifndef STANDALONE_DEBUG Index: linux.prev/arch/i386/Kconfig =================================================================== --- linux.prev.orig/arch/i386/Kconfig +++ linux.prev/arch/i386/Kconfig @@ -14,6 +14,10 @@ config X86_32 486, 586, Pentiums, and various instruction-set-compatible chips by AMD, Cyrix, and others. +config GENERIC_TIME + bool + default y + config SEMAPHORE_SLEEPERS bool default y @@ -173,6 +177,8 @@ config HPET_EMULATE_RTC depends on HPET_TIMER && RTC=y default y +source "kernel/time/Kconfig" + config SMP bool "Symmetric multi-processing support" ---help--- @@ -228,6 +234,19 @@ config SCHED_SMT source "kernel/Kconfig.preempt" +config RWSEM_GENERIC_SPINLOCK + bool + depends on M386 || PREEMPT_RT + default y + +config ASM_SEMAPHORES + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + default y if !RWSEM_GENERIC_SPINLOCK + config X86_UP_APIC bool "Local APIC support on uniprocessors" depends on !SMP && !(X86_VISWS || X86_VOYAGER) @@ -619,7 +638,7 @@ config BOOT_IOREMAP config REGPARM bool "Use register arguments (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on EXPERIMENTAL && !MCOUNT default n help Compile the kernel with -mregparm=3. This uses a different ABI @@ -1055,3 +1074,7 @@ config X86_TRAMPOLINE bool depends on X86_SMP || (X86_VOYAGER && SMP) default y + +config KTIME_SCALAR + bool + default y Index: linux.prev/arch/i386/Kconfig.cpu =================================================================== --- linux.prev.orig/arch/i386/Kconfig.cpu +++ linux.prev/arch/i386/Kconfig.cpu @@ -229,11 +229,6 @@ config RWSEM_GENERIC_SPINLOCK depends on M386 default y -config RWSEM_XCHGADD_ALGORITHM - bool - depends on !M386 - default y - config GENERIC_CALIBRATE_DELAY bool default y Index: linux.prev/arch/i386/Kconfig.debug =================================================================== --- linux.prev.orig/arch/i386/Kconfig.debug +++ linux.prev/arch/i386/Kconfig.debug @@ -18,6 +18,7 @@ config EARLY_PRINTK config DEBUG_STACKOVERFLOW bool "Check for stack overflows" depends on DEBUG_KERNEL + default y help This option will cause messages to be printed if free stack space drops below a certain limit. @@ -25,6 +26,7 @@ config DEBUG_STACKOVERFLOW config DEBUG_STACK_USAGE bool "Stack utilization instrumentation" depends on DEBUG_KERNEL + default y help Enables the display of the minimum amount of free stack which each task has ever had available in the sysrq-T and sysrq-P debug output. Index: linux.prev/arch/i386/boot/compressed/misc.c =================================================================== --- linux.prev.orig/arch/i386/boot/compressed/misc.c +++ linux.prev/arch/i386/boot/compressed/misc.c @@ -15,6 +15,12 @@ #include #include +#ifdef CONFIG_MCOUNT +void notrace mcount(void) +{ +} +#endif + /* * gzip declarations */ @@ -112,7 +118,7 @@ static long free_mem_end_ptr; #define INPLACE_MOVE_ROUTINE 0x1000 #define LOW_BUFFER_START 0x2000 #define LOW_BUFFER_MAX 0x90000 -#define HEAP_SIZE 0x3000 +#define HEAP_SIZE 0x4000 static unsigned int low_buffer_end, low_buffer_size; static int high_loaded =0; static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/; @@ -125,6 +131,7 @@ static int lines, cols; static void * xquad_portio = NULL; #endif +#define ZLIB_INFLATE_NO_INFLATE_LOCK #include "../../../../lib/inflate.c" static void *malloc(int size) Index: linux.prev/arch/i386/kernel/Makefile =================================================================== --- linux.prev.orig/arch/i386/kernel/Makefile +++ linux.prev/arch/i386/kernel/Makefile @@ -4,13 +4,13 @@ extra-y := head.o init_task.o vmlinux.lds -obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ +obj-y := process.o signal.o entry.o traps.o irq.o vm86.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - doublefault.o quirks.o i8237.o + doublefault.o quirks.o i8237.o i8253.o tsc.o +obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o obj-y += cpu/ -obj-y += timers/ obj-$(CONFIG_ACPI) += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca.o @@ -20,6 +20,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o smpboot.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o +obj-$(CONFIG_MCOUNT) += mcount-wrapper.o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o @@ -34,6 +35,8 @@ obj-$(CONFIG_ACPI_SRAT) += srat.o obj-$(CONFIG_HPET_TIMER) += time_hpet.o obj-$(CONFIG_EFI) += efi.o efi_stub.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_SYSFS) += switch2poll.o +obj-$(CONFIG_HPET_TIMER) += hpet.o EXTRA_AFLAGS := -traditional Index: linux.prev/arch/i386/kernel/acpi/boot.c =================================================================== --- linux.prev.orig/arch/i386/kernel/acpi/boot.c +++ linux.prev/arch/i386/kernel/acpi/boot.c @@ -567,7 +567,7 @@ static int __init acpi_parse_sbf(unsigne } #ifdef CONFIG_HPET_TIMER - +#include static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) { struct acpi_table_hpet *hpet_tbl; @@ -589,6 +589,7 @@ static int __init acpi_parse_hpet(unsign #ifdef CONFIG_X86_64 vxtime.hpet_address = hpet_tbl->addr.addrl | ((long)hpet_tbl->addr.addrh << 32); + hpet_address = vxtime.hpet_address; printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", hpet_tbl->id, vxtime.hpet_address); @@ -597,10 +598,10 @@ static int __init acpi_parse_hpet(unsign extern unsigned long hpet_address; hpet_address = hpet_tbl->addr.addrl; - printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", - hpet_tbl->id, hpet_address); } -#endif /* X86 */ +#endif /* X86 */ + printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", + hpet_tbl->id, hpet_address); return 0; } @@ -608,9 +609,8 @@ static int __init acpi_parse_hpet(unsign #define acpi_parse_hpet NULL #endif -#ifdef CONFIG_X86_PM_TIMER -extern u32 pmtmr_ioport; -#endif +u32 acpi_pmtmr_ioport; +int acpi_pmtmr_buggy; static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) { @@ -629,7 +629,6 @@ static int __init acpi_parse_fadt(unsign acpi_fadt.force_apic_physical_destination_mode = fadt->force_apic_physical_destination_mode; -#ifdef CONFIG_X86_PM_TIMER /* detect the location of the ACPI PM Timer */ if (fadt->revision >= FADT2_REVISION_ID) { /* FADT rev. 2 */ @@ -637,22 +636,22 @@ static int __init acpi_parse_fadt(unsign ACPI_ADR_SPACE_SYSTEM_IO) return 0; - pmtmr_ioport = fadt->xpm_tmr_blk.address; + acpi_pmtmr_ioport = fadt->xpm_tmr_blk.address; /* * "X" fields are optional extensions to the original V1.0 * fields, so we must selectively expand V1.0 fields if the * corresponding X field is zero. */ - if (!pmtmr_ioport) - pmtmr_ioport = fadt->V1_pm_tmr_blk; + if (!acpi_pmtmr_ioport) + acpi_pmtmr_ioport = fadt->V1_pm_tmr_blk; } else { /* FADT rev. 1 */ - pmtmr_ioport = fadt->V1_pm_tmr_blk; + acpi_pmtmr_ioport = fadt->V1_pm_tmr_blk; } - if (pmtmr_ioport) - printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", - pmtmr_ioport); -#endif + + if (acpi_pmtmr_ioport) + printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", acpi_pmtmr_ioport); + return 0; } Index: linux.prev/arch/i386/kernel/apic.c =================================================================== --- linux.prev.orig/arch/i386/kernel/apic.c +++ linux.prev/arch/i386/kernel/apic.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -50,6 +51,23 @@ int enable_local_apic __initdata = 0; /* */ int apic_verbosity; +static unsigned int calibration_result; + +static void lapic_next_event(unsigned long evt); +static void lapic_timer_setup(int mode); + +static struct clock_event lapic_clockevent = { + .name = "lapic", + .capabilities = CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE | + CLOCK_HAS_IRQHANDLER +#ifdef CONFIG_SMP + | CLOCK_CAP_UPDATE +#endif + , + .shift = 32, + .set_mode = lapic_timer_setup, + .set_next_event = lapic_next_event, +}; static void apic_pm_activate(void); @@ -92,10 +110,6 @@ void __init apic_intr_init(void) /* Using APIC to generate smp_local_timer_interrupt? */ int using_apic_timer = 0; -static DEFINE_PER_CPU(int, prof_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_old_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_counter) = 1; - static int enabled_via_apicbase; void enable_NMI_through_LVT0 (void * dummy) @@ -567,13 +581,13 @@ void lapic_shutdown(void) if (!cpu_has_apic) return; - local_irq_disable(); + raw_local_irq_disable(); clear_local_APIC(); if (enabled_via_apicbase) disable_local_APIC(); - local_irq_enable(); + raw_local_irq_enable(); } #ifdef CONFIG_PM @@ -617,9 +631,9 @@ static int lapic_suspend(struct sys_devi apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); - local_irq_save(flags); + raw_local_irq_save(flags); disable_local_APIC(); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -631,7 +645,7 @@ static int lapic_resume(struct sys_devic if (!apic_pm_state.active) return 0; - local_irq_save(flags); + raw_local_irq_save(flags); /* * Make sure the APICBASE points to the right address @@ -662,7 +676,7 @@ static int lapic_resume(struct sys_devic apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -875,6 +889,11 @@ fake_ioapic_page: */ /* + * FIXME: Move this to i8253.h. There is no need to keep the access to + * the PIT scattered all around the place -tglx + */ + +/* * The timer chip is already set up at HZ interrupts per second here, * but we do not accept timer interrupts yet. We only allow the BP * to calibrate. @@ -932,12 +951,16 @@ void (*wait_timer_tick)(void) __devinitd #define APIC_DIVISOR 16 -static void __setup_APIC_LVTT(unsigned int clocks) +static void __setup_APIC_LVTT(unsigned int clocks, int oneshot) { unsigned int lvtt_value, tmp_value, ver; ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; + + lvtt_value = LOCAL_TIMER_VECTOR; + if (!oneshot) + lvtt_value |= APIC_LVT_TIMER_PERIODIC; + if (!APIC_INTEGRATED(ver)) lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); apic_write_around(APIC_LVTT, lvtt_value); @@ -950,23 +973,27 @@ static void __setup_APIC_LVTT(unsigned i & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | APIC_TDR_DIV_16); - apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); + if (!oneshot) + apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); } -static void __devinit setup_APIC_timer(unsigned int clocks) +static void lapic_next_event(unsigned long evt) { - unsigned long flags; - - local_irq_save(flags); + apic_write_around(APIC_TMICT, evt); +} - /* - * Wait for IRQ0's slice: - */ - wait_timer_tick(); +static void lapic_timer_setup(int mode) +{ + unsigned long flags; - __setup_APIC_LVTT(clocks); + raw_local_irq_save(flags); + __setup_APIC_LVTT(calibration_result, mode == CLOCK_EVT_ONESHOT); + raw_local_irq_restore(flags); +} - local_irq_restore(flags); +static void __devinit setup_APIC_timer(void) +{ + setup_local_clockevent(&lapic_clockevent, CPU_MASK_NONE); } /* @@ -975,6 +1002,8 @@ static void __devinit setup_APIC_timer(u * to calibrate, since some later bootup code depends on getting * the first irq? Ugh. * + * TODO: Fix this rather than saying "Ugh" -tglx + * * We want to do the calibration only once since we * want to have local timer irqs syncron. CPUs connected * by the same APIC bus have the very same bus frequency. @@ -997,7 +1026,7 @@ static int __init calibrate_APIC_clock(v * value into the APIC clock, we just want to get the * counter running for calibration. */ - __setup_APIC_LVTT(1000000000); + __setup_APIC_LVTT(1000000000, 0); /* * The timer chip counts down to zero. Let's wait @@ -1034,6 +1063,13 @@ static int __init calibrate_APIC_clock(v result = (tt1-tt2)*APIC_DIVISOR/LOOPS; + /* Calculate the scaled math multiplication factor */ + lapic_clockevent.mult = div_sc32(tt1-tt2, TICK_NSEC * LOOPS); + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); + if (cpu_has_tsc) apic_printk(APIC_VERBOSE, "..... CPU clock speed is " "%ld.%04ld MHz.\n", @@ -1048,28 +1084,26 @@ static int __init calibrate_APIC_clock(v return result; } -static unsigned int calibration_result; - void __init setup_boot_APIC_clock(void) { unsigned long flags; apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"); using_apic_timer = 1; - local_irq_save(flags); + raw_local_irq_save(flags); calibration_result = calibrate_APIC_clock(); /* * Now set up the timer for real. */ - setup_APIC_timer(calibration_result); + setup_APIC_timer(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __devinit setup_secondary_APIC_clock(void) { - setup_APIC_timer(calibration_result); + setup_APIC_timer(); } void __devinit disable_APIC_timer(void) @@ -1092,6 +1126,8 @@ void enable_APIC_timer(void) } } +static DEFINE_PER_CPU(int, prof_multiplier) = 1; + /* * the frequency of the profiling timer can be changed * by writing a multiplier value into /proc/profile. @@ -1119,60 +1155,6 @@ int setup_profiling_timer(unsigned int m return 0; } - -#undef APIC_DIVISOR - -/* - * Local timer interrupt handler. It does both profiling and - * process statistics/rescheduling. - * - * We do profiling in every local tick, statistics/rescheduling - * happen only every 'profiling multiplier' ticks. The default - * multiplier is 1 and it can be changed by writing the new multiplier - * value into /proc/profile. - */ - -inline void smp_local_timer_interrupt(struct pt_regs * regs) -{ - int cpu = smp_processor_id(); - - profile_tick(CPU_PROFILING, regs); - if (--per_cpu(prof_counter, cpu) <= 0) { - /* - * The multiplier may have changed since the last time we got - * to this point as a result of the user writing to - * /proc/profile. In this case we need to adjust the APIC - * timer accordingly. - * - * Interrupts are already masked off at this point. - */ - per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu); - if (per_cpu(prof_counter, cpu) != - per_cpu(prof_old_multiplier, cpu)) { - __setup_APIC_LVTT( - calibration_result/ - per_cpu(prof_counter, cpu)); - per_cpu(prof_old_multiplier, cpu) = - per_cpu(prof_counter, cpu); - } - -#ifdef CONFIG_SMP - update_process_times(user_mode_vm(regs)); -#endif - } - - /* - * We take the 'long' return path, and there every subsystem - * grabs the apropriate locks (kernel lock/ irq lock). - * - * we might want to decouple profiling from the 'long path', - * and do the profiling totally in assembly. - * - * Currently this isn't too much of an issue (performance wise), - * we can take more than 100K local irqs per second on a 100 MHz P5. - */ -} - /* * Local APIC timer interrupt. This is the most natural way for doing * local interrupts, but local timer interrupts can be emulated by @@ -1182,7 +1164,7 @@ inline void smp_local_timer_interrupt(st * interrupt as well. Thus we cannot inline the local irq ... ] */ -fastcall void smp_apic_timer_interrupt(struct pt_regs *regs) +fastcall notrace void smp_apic_timer_interrupt(struct pt_regs *regs) { int cpu = smp_processor_id(); @@ -1191,6 +1173,8 @@ fastcall void smp_apic_timer_interrupt(s */ per_cpu(irq_stat, cpu).apic_timer_irqs++; + trace_special(regs->eip, 0, 0); + /* * NOTE! We'd better ACK the irq immediately, * because timer handling can be slow. @@ -1202,7 +1186,17 @@ fastcall void smp_apic_timer_interrupt(s * interrupt lock, which is the WrongThing (tm) to do. */ irq_enter(); - smp_local_timer_interrupt(regs); + /* + * If the task is currently running in user mode, don't + * detect soft lockups. If CONFIG_DETECT_SOFTLOCKUP is not + * configured, this should be optimized out. + */ + if (user_mode(regs)) + touch_softlockup_watchdog(); + + if (lapic_clockevent.event_handler) + lapic_clockevent.event_handler(regs); + irq_exit(); } @@ -1257,6 +1251,7 @@ fastcall void smp_error_interrupt(struct */ printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", smp_processor_id(), v , v1); + dump_stack(); irq_exit(); } Index: linux.prev/arch/i386/kernel/apm.c =================================================================== --- linux.prev.orig/arch/i386/kernel/apm.c +++ linux.prev/arch/i386/kernel/apm.c @@ -552,9 +552,9 @@ static inline void apm_restore_cpus(cpum */ #define APM_DO_CLI \ if (apm_info.allow_ints) \ - local_irq_enable(); \ + raw_local_irq_enable(); \ else \ - local_irq_disable(); + raw_local_irq_disable(); #ifdef APM_ZERO_SEGS # define APM_DECL_SEGS \ @@ -606,12 +606,12 @@ static u8 apm_bios_call(u32 func, u32 eb save_desc_40 = gdt[0x40 / 8]; gdt[0x40 / 8] = bad_bios_desc; - local_save_flags(flags); + raw_local_save_flags(flags); APM_DO_CLI; APM_DO_SAVE_SEGS; apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi); APM_DO_RESTORE_SEGS; - local_irq_restore(flags); + raw_local_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; put_cpu(); apm_restore_cpus(cpus); @@ -650,12 +650,12 @@ static u8 apm_bios_call_simple(u32 func, save_desc_40 = gdt[0x40 / 8]; gdt[0x40 / 8] = bad_bios_desc; - local_save_flags(flags); + raw_local_save_flags(flags); APM_DO_CLI; APM_DO_SAVE_SEGS; error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax); APM_DO_RESTORE_SEGS; - local_irq_restore(flags); + raw_local_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; put_cpu(); apm_restore_cpus(cpus); @@ -1215,7 +1215,7 @@ static int suspend(int vetoable) } device_suspend(PMSG_SUSPEND); - local_irq_disable(); + raw_local_irq_disable(); device_power_down(PMSG_SUSPEND); /* serialize with the timer interrupt */ @@ -1231,14 +1231,14 @@ static int suspend(int vetoable) */ spin_unlock(&i8253_lock); write_sequnlock(&xtime_lock); - local_irq_enable(); + raw_local_irq_enable(); save_processor_state(); err = set_system_power_state(APM_STATE_SUSPEND); ignore_normal_resume = 1; restore_processor_state(); - local_irq_disable(); + raw_local_irq_disable(); write_seqlock(&xtime_lock); spin_lock(&i8253_lock); reinit_timer(); @@ -1253,7 +1253,7 @@ static int suspend(int vetoable) apm_error("suspend", err); err = (err == APM_SUCCESS) ? 0 : -EIO; device_power_up(); - local_irq_enable(); + raw_local_irq_enable(); device_resume(); pm_send_all(PM_RESUME, (void *)0); queue_event(APM_NORMAL_RESUME, NULL); @@ -1272,22 +1272,22 @@ static void standby(void) { int err; - local_irq_disable(); + raw_local_irq_disable(); device_power_down(PMSG_SUSPEND); /* serialize with the timer interrupt */ write_seqlock(&xtime_lock); /* If needed, notify drivers here */ get_time_diff(); write_sequnlock(&xtime_lock); - local_irq_enable(); + raw_local_irq_enable(); err = set_system_power_state(APM_STATE_STANDBY); if ((err != APM_SUCCESS) && (err != APM_NO_ERROR)) apm_error("standby", err); - local_irq_disable(); + raw_local_irq_disable(); device_power_up(); - local_irq_enable(); + raw_local_irq_enable(); } static apm_event_t get_event(void) Index: linux.prev/arch/i386/kernel/cpu/cpufreq/longhaul.c =================================================================== --- linux.prev.orig/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ linux.prev/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -144,7 +144,7 @@ static void do_powersaver(union msr_long longhaul->bits.RevisionKey = 0; preempt_disable(); - local_irq_save(flags); + raw_local_irq_save(flags); /* * get current pci bus master state for all devices @@ -166,11 +166,11 @@ static void do_powersaver(union msr_long outb(0xFE,0x21); /* TMR0 only */ outb(0xFF,0x80); /* delay */ - safe_halt(); + raw_safe_halt(); wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); halt(); - local_irq_disable(); + raw_local_irq_disable(); outb(tmp_mask,0x21); /* restore mask */ @@ -184,7 +184,7 @@ static void do_powersaver(union msr_long pci_write_config_byte(dev, PCI_COMMAND, pci_cmd); } } while (dev != NULL); - local_irq_restore(flags); + raw_local_irq_restore(flags); preempt_enable(); /* disable bus ratio bit */ @@ -245,16 +245,16 @@ static void longhaul_setstate(unsigned i /* Enable software clock multiplier */ bcr2.bits.ESOFTBF = 1; bcr2.bits.CLOCKMUL = clock_ratio_index; - local_irq_disable(); + raw_local_irq_disable(); wrmsrl (MSR_VIA_BCR2, bcr2.val); - safe_halt(); + raw_safe_halt(); /* Disable software clock multiplier */ rdmsrl (MSR_VIA_BCR2, bcr2.val); bcr2.bits.ESOFTBF = 0; - local_irq_disable(); + raw_local_irq_disable(); wrmsrl (MSR_VIA_BCR2, bcr2.val); - local_irq_enable(); + raw_local_irq_enable(); break; /* Index: linux.prev/arch/i386/kernel/cpu/mtrr/cyrix.c =================================================================== --- linux.prev.orig/arch/i386/kernel/cpu/mtrr/cyrix.c +++ linux.prev/arch/i386/kernel/cpu/mtrr/cyrix.c @@ -17,7 +17,7 @@ cyrix_get_arr(unsigned int reg, unsigned arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ /* Save flags and disable interrupts */ - local_irq_save(flags); + raw_local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ @@ -28,7 +28,7 @@ cyrix_get_arr(unsigned int reg, unsigned setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ /* Enable interrupts if it was enabled previously */ - local_irq_restore(flags); + raw_local_irq_restore(flags); shift = ((unsigned char *) base)[1] & 0x0f; *base >>= PAGE_SHIFT; Index: linux.prev/arch/i386/kernel/cpu/mtrr/generic.c =================================================================== --- linux.prev.orig/arch/i386/kernel/cpu/mtrr/generic.c +++ linux.prev/arch/i386/kernel/cpu/mtrr/generic.c @@ -234,7 +234,7 @@ static unsigned long set_mtrr_state(u32 static unsigned long cr4 = 0; static u32 deftype_lo, deftype_hi; -static DEFINE_SPINLOCK(set_atomicity_lock); +static DEFINE_RAW_SPINLOCK(set_atomicity_lock); /* * Since we are disabling the cache don't allow any interrupts - they @@ -296,14 +296,14 @@ static void generic_set_all(void) unsigned long mask, count; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); prepare_set(); /* Actually set the state */ mask = set_mtrr_state(deftype_lo,deftype_hi); post_set(); - local_irq_restore(flags); + raw_local_irq_restore(flags); /* Use the atomic bitops to update the global mask */ for (count = 0; count < sizeof mask * 8; ++count) { @@ -331,7 +331,7 @@ static void generic_set_mtrr(unsigned in vr = &mtrr_state.var_ranges[reg]; - local_irq_save(flags); + raw_local_irq_save(flags); prepare_set(); if (size == 0) { @@ -350,7 +350,7 @@ static void generic_set_mtrr(unsigned in } post_set(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type) Index: linux.prev/arch/i386/kernel/cpu/mtrr/main.c =================================================================== --- linux.prev.orig/arch/i386/kernel/cpu/mtrr/main.c +++ linux.prev/arch/i386/kernel/cpu/mtrr/main.c @@ -146,7 +146,7 @@ static void ipi_handler(void *info) struct set_mtrr_data *data = info; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); atomic_dec(&data->count); while(!atomic_read(&data->gate)) @@ -164,7 +164,7 @@ static void ipi_handler(void *info) cpu_relax(); atomic_dec(&data->count); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #endif @@ -225,7 +225,7 @@ static void set_mtrr(unsigned int reg, u if (smp_call_function(ipi_handler, &data, 1, 0) != 0) panic("mtrr: timed out waiting for other CPUs\n"); - local_irq_save(flags); + raw_local_irq_save(flags); while(atomic_read(&data.count)) cpu_relax(); @@ -259,7 +259,7 @@ static void set_mtrr(unsigned int reg, u while(atomic_read(&data.count)) cpu_relax(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /** @@ -695,11 +695,11 @@ void mtrr_ap_init(void) * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to * prevent mtrr entry changes */ - local_irq_save(flags); + raw_local_irq_save(flags); mtrr_if->set_all(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static int __init mtrr_init_finialize(void) Index: linux.prev/arch/i386/kernel/cpu/mtrr/state.c =================================================================== --- linux.prev.orig/arch/i386/kernel/cpu/mtrr/state.c +++ linux.prev/arch/i386/kernel/cpu/mtrr/state.c @@ -12,7 +12,7 @@ void set_mtrr_prepare_save(struct set_mt unsigned int cr0; /* Disable interrupts locally */ - local_irq_save(ctxt->flags); + raw_local_irq_save(ctxt->flags); if (use_intel() || is_cpu(CYRIX)) { @@ -73,6 +73,6 @@ void set_mtrr_done(struct set_mtrr_conte write_cr4(ctxt->cr4val); } /* Re-enable interrupts locally (if enabled previously) */ - local_irq_restore(ctxt->flags); + raw_local_irq_restore(ctxt->flags); } Index: linux.prev/arch/i386/kernel/entry.S =================================================================== --- linux.prev.orig/arch/i386/kernel/entry.S +++ linux.prev/arch/i386/kernel/entry.S @@ -76,10 +76,10 @@ NT_MASK = 0x00004000 VM_MASK = 0x00020000 #ifdef CONFIG_PREEMPT -#define preempt_stop cli +# define preempt_stop cli #else -#define preempt_stop -#define resume_kernel restore_nocheck +# define preempt_stop +# define resume_kernel restore_nocheck #endif #define SAVE_ALL \ @@ -160,14 +160,17 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) cli + cmpl $0, kernel_preemption + jz restore_nocheck cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_nocheck need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl - jz restore_all + jz restore_nocheck testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? - jz restore_all + jz restore_nocheck + cli call preempt_schedule_irq jmp need_resched #endif @@ -200,6 +203,11 @@ sysenter_past_esp: pushl %eax SAVE_ALL +#ifdef CONFIG_LATENCY_TRACE + pushl %edx; pushl %ecx; pushl %ebx; pushl %eax + call sys_call + popl %eax; popl %ebx; popl %ecx; popl %edx +#endif GET_THREAD_INFO(%ebp) /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ @@ -213,6 +221,11 @@ sysenter_past_esp: movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work +#ifdef CONFIG_LATENCY_TRACE + pushl %eax + call sys_ret + popl %eax +#endif /* if something modifies registers it must also disable sysexit */ movl EIP(%esp), %edx movl OLDESP(%esp), %ecx @@ -225,6 +238,11 @@ sysenter_past_esp: ENTRY(system_call) pushl %eax # save orig_eax SAVE_ALL +#ifdef CONFIG_LATENCY_TRACE + pushl %edx; pushl %ecx; pushl %ebx; pushl %eax + call sys_call + popl %eax; popl %ebx; popl %ecx; popl %edx +#endif GET_THREAD_INFO(%ebp) # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ @@ -254,6 +272,17 @@ restore_all: cmpl $((4 << 8) | 3), %eax je ldt_ss # returning to user-space with LDT SS restore_nocheck: +#if defined(CONFIG_CRITICAL_IRQSOFF_TIMING) || defined(CONFIG_LATENCY_TRACE) + pushl %eax +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + call trace_irqs_on +#endif +#ifdef CONFIG_LATENCY_TRACE + call sys_ret +#endif + popl %eax +#endif +restore_nocheck_nmi: RESTORE_REGS addl $4, %esp 1: iret @@ -297,18 +326,19 @@ ldt_ss: # perform work that needs to be done immediately before resumption ALIGN work_pending: - testb $_TIF_NEED_RESCHED, %cl + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx jz work_notifysig work_resched: - call schedule - cli # make sure we don't miss an interrupt + cli + call __schedule + # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? jz restore_all - testb $_TIF_NEED_RESCHED, %cl + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx jnz work_resched work_notifysig: # deal with pending signals and @@ -351,6 +381,11 @@ syscall_trace_entry: syscall_exit_work: testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl jz work_pending +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + pushl %eax + call trace_irqs_on + popl %eax +#endif sti # could let do_syscall_trace() call # schedule() instead movl %esp, %eax @@ -412,9 +447,16 @@ ENTRY(irq_entries_start) vector=vector+1 .endr +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING +# define TRACE_IRQS_OFF call trace_irqs_off_lowlevel; +#else +# define TRACE_IRQS_OFF +#endif + ALIGN common_interrupt: SAVE_ALL + TRACE_IRQS_OFF movl %esp,%eax call do_IRQ jmp ret_from_intr @@ -423,6 +465,7 @@ common_interrupt: ENTRY(name) \ pushl $nr-256; \ SAVE_ALL \ + TRACE_IRQS_OFF \ movl %esp,%eax; \ call smp_/**/name; \ jmp ret_from_intr; @@ -552,7 +595,7 @@ nmi_stack_correct: xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi - jmp restore_all + jmp restore_nocheck_nmi nmi_stack_fixup: FIX_STACK(12,nmi_stack_correct, 1) Index: linux.prev/arch/i386/kernel/hpet.c =================================================================== --- /dev/null +++ linux.prev/arch/i386/kernel/hpet.c @@ -0,0 +1,69 @@ +#include +#include +#include +#include + +#include +#include + +#define HPET_MASK 0xFFFFFFFF +#define HPET_SHIFT 22 + +/* FSEC = 10^-15 NSEC = 10^-9 */ +#define FSEC_PER_NSEC 1000000 + +static void *hpet_ptr; + +static cycle_t read_hpet(void) +{ + return (cycle_t)readl(hpet_ptr); +} + +struct clocksource clocksource_hpet = { + .name = "hpet", + .rating = 250, + .read = read_hpet, + .mask = (cycle_t)HPET_MASK, + .mult = 0, /* set below */ + .shift = HPET_SHIFT, + .is_continuous = 1, +}; + +static int __init init_hpet_clocksource(void) +{ + unsigned long hpet_period; + void __iomem* hpet_base; + u64 tmp; + + if (!hpet_address) + return -ENODEV; + + /* calculate the hpet address: */ + hpet_base = + (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE); + hpet_ptr = hpet_base + HPET_COUNTER; + + /* calculate the frequency: */ + hpet_period = readl(hpet_base + HPET_PERIOD); + + /* + * hpet period is in femto seconds per cycle + * so we need to convert this to ns/cyc units + * aproximated by mult/2^shift + * + * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift + * fsec/cyc * 1ns/1000000fsec * 2^shift = mult + * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult + * (fsec/cyc << shift)/1000000 = mult + * (hpet_period << shift)/FSEC_PER_NSEC = mult + */ + tmp = (u64)hpet_period << HPET_SHIFT; + do_div(tmp, FSEC_PER_NSEC); + clocksource_hpet.mult = (u32)tmp; + + register_clocksource(&clocksource_hpet); + + return 0; +} + +module_init(init_hpet_clocksource); Index: linux.prev/arch/i386/kernel/i386_ksyms.c =================================================================== --- linux.prev.orig/arch/i386/kernel/i386_ksyms.c +++ linux.prev/arch/i386/kernel/i386_ksyms.c @@ -6,10 +6,12 @@ /* This is definitely a GPL-only symbol */ EXPORT_SYMBOL_GPL(cpu_gdt_table); -EXPORT_SYMBOL(__down_failed); -EXPORT_SYMBOL(__down_failed_interruptible); -EXPORT_SYMBOL(__down_failed_trylock); -EXPORT_SYMBOL(__up_wakeup); +#ifdef CONFIG_ASM_SEMAPHORES +EXPORT_SYMBOL(__compat_down_failed); +EXPORT_SYMBOL(__compat_down_failed_interruptible); +EXPORT_SYMBOL(__compat_down_failed_trylock); +EXPORT_SYMBOL(__compat_up_wakeup); +#endif /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); @@ -25,7 +27,7 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(strstr); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && defined(CONFIG_ASM_SEMAPHORES) extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); EXPORT_SYMBOL(__write_lock_failed); Index: linux.prev/arch/i386/kernel/i8253.c =================================================================== --- /dev/null +++ linux.prev/arch/i386/kernel/i8253.c @@ -0,0 +1,137 @@ +/* + * i8253.c 8253/PIT functions + * + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "io_ports.h" + +DEFINE_RAW_SPINLOCK(i8253_lock); +EXPORT_SYMBOL(i8253_lock); + +static void init_pit_timer(int mode) +{ + unsigned long flags; + + spin_lock_irqsave(&i8253_lock, flags); + + if (mode != CLOCK_EVT_ONESHOT) { + /* binary, mode 2, LSB/MSB, ch 0 */ + outb_p(0x34, PIT_MODE); + udelay(10); + outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ + outb(LATCH >> 8 , PIT_CH0); /* MSB */ + } else { + /* One shot setup */ + outb_p(0x38, PIT_MODE); + udelay(10); + } + + spin_unlock_irqrestore(&i8253_lock, flags); +} + +static void pit_next_event(unsigned long evt) +{ + unsigned long flags; + + spin_lock_irqsave(&i8253_lock, flags); + outb_p(evt & 0xff , PIT_CH0); /* LSB */ + outb(evt >> 8 , PIT_CH0); /* MSB */ + spin_unlock_irqrestore(&i8253_lock, flags); +} + +static struct clock_event pit_clockevent = { + .name = "pit", + .capabilities = CLOCK_CAP_TICK +#ifndef CONFIG_SMP + | CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE | + CLOCK_CAP_UPDATE +#endif + , + .set_mode = init_pit_timer, + .set_next_event = pit_next_event, + .start_event = io_apic_timer_ack, + .end_event = mca_timer_ack, + .shift = 32, + .irq = 0, +}; + +void setup_pit_timer(void) +{ + pit_clockevent.mult = div_sc32(CLOCK_TICK_RATE, NSEC_PER_SEC); + pit_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFF, &pit_clockevent); + pit_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &pit_clockevent); + setup_global_clockevent(&pit_clockevent, CPU_MASK_NONE); +} + +/* + * Since the PIT overflows every tick, its not very useful + * to just read by itself. So use jiffies to emulate a free + * running counter: + */ +static cycle_t pit_read(void) +{ + unsigned long flags, seq; + int count; + u64 jifs; + + do { + seq = read_seqbegin(&xtime_lock); + + spin_lock_irqsave(&i8253_lock, flags); + outb_p(0x00, PIT_MODE); /* latch the count ASAP */ + count = inb_p(PIT_CH0); /* read the latched count */ + count |= inb_p(PIT_CH0) << 8; + + /* VIA686a test code... reset the latch if count > max + 1 */ + if (count > LATCH) { + outb_p(0x34, PIT_MODE); + outb_p(LATCH & 0xff, PIT_CH0); + outb(LATCH >> 8, PIT_CH0); + count = LATCH - 1; + } + spin_unlock_irqrestore(&i8253_lock, flags); + + jifs = jiffies_64; + } while (read_seqretry(&xtime_lock, seq)); + + jifs -= INITIAL_JIFFIES; + count = (LATCH-1) - count; + + return (cycle_t)(jifs * LATCH) + count; +} + +static struct clocksource clocksource_pit = { + .name = "pit", + .rating = 110, + .read = pit_read, + .mask = (cycle_t)-1, + .mult = 0, + .shift = 20, +}; + +static int __init init_pit_clocksource(void) +{ + if (num_possible_cpus() > 4) /* PIT does not scale! */ + return 0; + + clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); + register_clocksource(&clocksource_pit); + + return 0; +} +module_init(init_pit_clocksource); Index: linux.prev/arch/i386/kernel/i8259.c =================================================================== --- linux.prev.orig/arch/i386/kernel/i8259.c +++ linux.prev/arch/i386/kernel/i8259.c @@ -35,7 +35,7 @@ * moves to arch independent land */ -DEFINE_SPINLOCK(i8259A_lock); +DEFINE_RAW_SPINLOCK(i8259A_lock); static void end_8259A_irq (unsigned int irq) { @@ -366,7 +366,7 @@ static irqreturn_t math_error_irq(int cp * New motherboards sometimes make IRQ 13 be a PCI interrupt, * so allow interrupt sharing. */ -static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL }; +static struct irqaction fpu_irq = { math_error_irq, SA_NODELAY, CPU_MASK_NONE, "fpu", NULL, NULL }; void __init init_ISA_irqs (void) { @@ -422,12 +422,6 @@ void __init init_IRQ(void) intr_init_hook(); /* - * Set the clock to HZ Hz, we already have a valid - * vector now: - */ - setup_pit_timer(); - - /* * External FPU? Set up irq13 if so, for * original braindamaged IBM FERR coupling. */ Index: linux.prev/arch/i386/kernel/init_task.c =================================================================== --- linux.prev.orig/arch/i386/kernel/init_task.c +++ linux.prev/arch/i386/kernel/init_task.c @@ -10,8 +10,8 @@ #include #include -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); Index: linux.prev/arch/i386/kernel/io_apic.c =================================================================== --- linux.prev.orig/arch/i386/kernel/io_apic.c +++ linux.prev/arch/i386/kernel/io_apic.c @@ -49,7 +49,7 @@ atomic_t irq_mis_count; /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; -static DEFINE_SPINLOCK(ioapic_lock); +static DEFINE_RAW_SPINLOCK(ioapic_lock); /* * Is the SiS APIC rmw bug present ? @@ -90,6 +90,27 @@ int vector_irq[NR_VECTORS] __read_mostly #define vector_to_irq(vector) (vector) #endif +static int timer_ack; + +void io_apic_timer_ack(void *priv) +{ + unsigned long flags; + + if (timer_ack) { + /* + * Subtle, when I/O APICs are used we have to ack timer IRQ + * manually to reset the IRR bit for do_slow_gettimeoffset(). + * This will also deassert NMI lines for the watchdog if run + * on an 82489DX-based system. + */ + spin_lock_irqsave(&i8259A_lock, flags); + outb(0x0c, PIC_MASTER_OCW3); + /* Ack the IRQ; AEOI will end it automatically. */ + inb(PIC_MASTER_POLL); + spin_unlock_irqrestore(&i8259A_lock, flags); + } +} + /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super @@ -133,6 +154,105 @@ static void __init replace_pin_at_irq(un } } +//#define IOAPIC_CACHE + +#ifdef IOAPIC_CACHE +# define MAX_IOAPIC_CACHE 512 + +/* + * Cache register values: + */ +static unsigned int io_apic_cache[MAX_IO_APICS][MAX_IOAPIC_CACHE] + ____cacheline_aligned_in_smp; +#endif + +inline unsigned int __raw_io_apic_read(unsigned int apic, unsigned int reg) +{ + *IO_APIC_BASE(apic) = reg; + return *(IO_APIC_BASE(apic)+4); +} + +unsigned int raw_io_apic_read(unsigned int apic, unsigned int reg) +{ + unsigned int val = __raw_io_apic_read(apic, reg); + +#ifdef IOAPIC_CACHE + io_apic_cache[apic][reg] = val; +#endif + return val; +} + +unsigned int io_apic_read(unsigned int apic, unsigned int reg) +{ +#ifdef IOAPIC_CACHE + if (unlikely(reg >= MAX_IOAPIC_CACHE)) { + static int once = 1; + + if (once) { + once = 0; + printk("WARNING: ioapic register cache overflow: %d.\n", + reg); + dump_stack(); + } + return __raw_io_apic_read(apic, reg); + } + if (io_apic_cache[apic][reg] && !sis_apic_bug) + return io_apic_cache[apic][reg]; +#endif + return raw_io_apic_read(apic, reg); +} + +void io_apic_write(unsigned int apic, unsigned int reg, unsigned int val) +{ +#ifdef IOAPIC_CACHE + if (unlikely(reg >= MAX_IOAPIC_CACHE)) { + static int once = 1; + + if (once) { + once = 0; + printk("WARNING: ioapic register cache overflow: %d.\n", + reg); + dump_stack(); + } + } else + io_apic_cache[apic][reg] = val; +#endif + *IO_APIC_BASE(apic) = reg; + *(IO_APIC_BASE(apic)+4) = val; +} + +/* + * Some systems need a POST flush or else level-triggered interrupts + * generate lots of spurious interrupts due to the POST-ed write not + * reaching the IOAPIC before the IRQ is ACK-ed in the local APIC. + */ +#ifdef CONFIG_SMP +# define IOAPIC_POSTFLUSH +#endif + +/* + * Re-write a value: to be used for read-modify-write + * cycles where the read already set up the index register. + * + * Older SiS APIC requires we rewrite the index regiser + */ +void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val) +{ +#ifdef IOAPIC_CACHE + io_apic_cache[apic][reg] = val; +#endif + if (unlikely(sis_apic_bug)) + *IO_APIC_BASE(apic) = reg; + *(IO_APIC_BASE(apic)+4) = val; +#ifndef IOAPIC_POSTFLUSH + if (unlikely(sis_apic_bug)) +#endif + /* + * Force POST flush by reading: + */ + val = *(IO_APIC_BASE(apic)+4); +} + static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) { struct irq_pin_list *entry = irq_2_pin + irq; @@ -164,18 +284,6 @@ static void __unmask_IO_APIC_irq (unsign __modify_IO_APIC_irq(irq, 0, 0x00010000); } -/* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); -} - -/* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); -} - static void mask_IO_APIC_irq (unsigned int irq) { unsigned long flags; @@ -1430,8 +1538,8 @@ void __init print_IO_APIC(void) struct IO_APIC_route_entry entry; spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); + *(((int *)&entry)+0) = raw_io_apic_read(apic, 0x10+i*2); + *(((int *)&entry)+1) = raw_io_apic_read(apic, 0x11+i*2); spin_unlock_irqrestore(&ioapic_lock, flags); printk(KERN_DEBUG " %02x %03X %02X ", @@ -1477,7 +1585,7 @@ void __init print_IO_APIC(void) return; } -#if 0 +#if 1 static void print_APIC_bitfield (int base) { @@ -1866,7 +1974,7 @@ static int __init timer_irq_works(void) { unsigned long t1 = jiffies; - local_irq_enable(); + raw_local_irq_enable(); /* Let ten ticks pass... */ mdelay((10 * 1000) / HZ); @@ -1877,7 +1985,7 @@ static int __init timer_irq_works(void) * might have cached one ExtINT interrupt. Finally, at * least one tick may be lost due to delays. */ - if (jiffies - t1 > 4) + if (jiffies - t1 > 4 && jiffies - t1 < 16) return 1; return 0; @@ -1930,9 +2038,11 @@ static unsigned int startup_edge_ioapic_ static void ack_edge_ioapic_irq(unsigned int irq) { move_irq(irq); +#if 0 if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) mask_IO_APIC_irq(irq); +#endif ack_APIC_irq(); } @@ -1957,6 +2067,30 @@ static unsigned int startup_level_ioapic return 0; /* don't check for pending */ } +#ifdef CONFIG_PREEMPT_HARDIRQS + +/* + * in the PREEMPT_HARDIRQS case we dont want to keep the local + * APIC unacked, because the prevents further interrupts from + * being handled - and with IRQ threads being delayed arbitrarily, + * that's unacceptable. So we first mask the IRQ, then ack it. + * The hardirq thread will then unmask it. + */ +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ + move_irq(irq); + mask_IO_APIC_irq(irq); + ack_APIC_irq(); +} + +#else + +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ +} + +#endif + static void end_level_ioapic_irq (unsigned int irq) { unsigned long v; @@ -1991,8 +2125,10 @@ static void end_level_ioapic_irq (unsign if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); + /* mask = 1, trigger = 0 */ + __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); + /* mask = 0, trigger = 1 */ + __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); spin_unlock(&ioapic_lock); } } @@ -2020,6 +2156,13 @@ static unsigned int startup_level_ioapic return startup_level_ioapic_irq (irq); } +static void mask_and_ack_level_ioapic_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + mask_and_ack_level_ioapic_irq(irq); +} + static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); Index: linux.prev/arch/i386/kernel/irq.c =================================================================== --- linux.prev.orig/arch/i386/kernel/irq.c +++ linux.prev/arch/i386/kernel/irq.c @@ -51,7 +51,7 @@ static union irq_ctx *softirq_ctx[NR_CPU * SMP cross-CPU interrupts have their own specific * handlers). */ -fastcall unsigned int do_IRQ(struct pt_regs *regs) +fastcall notrace unsigned int do_IRQ(struct pt_regs *regs) { /* high bits used in ret_from_ code */ int irq = regs->orig_eax & 0xff; @@ -59,8 +59,12 @@ fastcall unsigned int do_IRQ(struct pt_r union irq_ctx *curctx, *irqctx; u32 *isp; #endif - irq_enter(); +#ifdef CONFIG_LATENCY_TRACE + if (irq == trace_user_trigger_irq) + user_trace_start(); +#endif + trace_special(regs->eip, irq, 0); #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 1KB free? */ { @@ -69,7 +73,7 @@ fastcall unsigned int do_IRQ(struct pt_r __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (THREAD_SIZE - 1)); if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { - printk("do_IRQ: stack overflow: %ld\n", + printk("BUG: do_IRQ: stack overflow: %ld\n", esp - sizeof(struct thread_info)); dump_stack(); } @@ -173,7 +177,7 @@ asmlinkage void do_softirq(void) if (in_interrupt()) return; - local_irq_save(flags); + raw_local_irq_save(flags); if (local_softirq_pending()) { curctx = current_thread_info(); @@ -194,7 +198,7 @@ asmlinkage void do_softirq(void) ); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(do_softirq); @@ -224,8 +228,10 @@ int show_interrupts(struct seq_file *p, } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); - action = irq_desc[i].action; + irq_desc_t *desc = irq_desc + i; + + spin_lock_irqsave(&desc->lock, flags); + action = desc->action; if (!action) goto skip; seq_printf(p, "%3d: ",i); @@ -235,15 +241,27 @@ int show_interrupts(struct seq_file *p, for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif - seq_printf(p, " %14s", irq_desc[i].handler->typename); + seq_printf(p, " %-14s", desc->handler->typename); +#define F(x,c) ((desc->status & x) ? c : '.') + seq_printf(p, " [%c%c%c%c%c%c%c%c%c/", + F(IRQ_INPROGRESS, 'I'), + F(IRQ_DISABLED, 'D'), + F(IRQ_PENDING, 'P'), + F(IRQ_REPLAY, 'R'), + F(IRQ_AUTODETECT, 'A'), + F(IRQ_WAITING, 'W'), + F(IRQ_LEVEL, 'L'), + F(IRQ_MASKED, 'M'), + F(IRQ_NODELAY, 'N')); +#undef F + seq_printf(p, "%3d]", desc->irqs_unhandled); seq_printf(p, " %s", action->name); - for (action=action->next; action; action = action->next) seq_printf(p, ", %s", action->name); seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + spin_unlock_irqrestore(&desc->lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); for_each_online_cpu(j) @@ -298,9 +316,9 @@ void fixup_irqs(cpumask_t map) barrier(); #else /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); + raw_local_irq_enable(); mdelay(1); - local_irq_disable(); + raw_local_irq_disable(); #endif } #endif Index: linux.prev/arch/i386/kernel/mca.c =================================================================== --- linux.prev.orig/arch/i386/kernel/mca.c +++ linux.prev/arch/i386/kernel/mca.c @@ -472,3 +472,22 @@ void mca_handle_nmi(void) mca_nmi_hook(); } /* mca_handle_nmi */ + +void mca_timer_ack(void *priv) +{ + int irq; + + if (MCA_bus) { + /* The PS/2 uses level-triggered interrupts. You can't + turn them off, nor would you want to (any attempt to + enable edge-triggered interrupts usually gets intercepted by a + special hardware circuit). Hence we have to acknowledge + the timer interrupt. Through some incredibly stupid + design idea, the reset for IRQ 0 is done by setting the + high bit of the PPI port B (0x61). Note that some PS/2s, + notably the 55SX, work fine if this is removed. */ + + irq = inb_p( 0x61 ); /* read the current state */ + outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ + } +} Index: linux.prev/arch/i386/kernel/mcount-wrapper.S =================================================================== --- /dev/null +++ linux.prev/arch/i386/kernel/mcount-wrapper.S @@ -0,0 +1,27 @@ +/* + * linux/arch/i386/mcount-wrapper.S + * + * Copyright (C) 2004 Ingo Molnar + */ + +.globl mcount +mcount: + + cmpl $0, mcount_enabled + jz out + + push %ebp + mov %esp, %ebp + pushl %eax + pushl %ecx + pushl %edx + + call __mcount + + popl %edx + popl %ecx + popl %eax + popl %ebp +out: + ret + Index: linux.prev/arch/i386/kernel/microcode.c =================================================================== --- linux.prev.orig/arch/i386/kernel/microcode.c +++ linux.prev/arch/i386/kernel/microcode.c @@ -109,7 +109,7 @@ MODULE_LICENSE("GPL"); #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) /* serialize access to the physical write to MSR 0x79 */ -static DEFINE_SPINLOCK(microcode_update_lock); +static DEFINE_RAW_SPINLOCK(microcode_update_lock); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ static DECLARE_MUTEX(microcode_sem); Index: linux.prev/arch/i386/kernel/nmi.c =================================================================== --- linux.prev.orig/arch/i386/kernel/nmi.c +++ linux.prev/arch/i386/kernel/nmi.c @@ -34,7 +34,7 @@ unsigned int nmi_watchdog = NMI_NONE; extern int unknown_nmi_panic; -static unsigned int nmi_hz = HZ; +static unsigned int nmi_hz = 1000; static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ static unsigned int nmi_p4_cccr_val; extern void show_registers(struct pt_regs *regs); @@ -108,7 +108,7 @@ int nmi_active; static __init void nmi_cpu_busy(void *data) { volatile int *endflag = data; - local_irq_enable(); + raw_local_irq_enable(); /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, even if there is a simulator or similar that catches the @@ -140,8 +140,8 @@ static int __init check_nmi_watchdog(voi for (cpu = 0; cpu < NR_CPUS; cpu++) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; - local_irq_enable(); - mdelay((10*1000)/nmi_hz); // wait 10 ticks + raw_local_irq_enable(); + mdelay((100*1000)/nmi_hz); // wait 100 ticks for (cpu = 0; cpu < NR_CPUS; cpu++) { #ifdef CONFIG_SMP @@ -168,7 +168,7 @@ static int __init check_nmi_watchdog(voi /* now that we know it works we can reduce NMI frequency to something more reasonable; makes a difference in some configs */ if (nmi_watchdog == NMI_LOCAL_APIC) - nmi_hz = 1; + nmi_hz = 10000; kfree(prev_nmi_count); return 0; @@ -521,9 +521,34 @@ void touch_nmi_watchdog (void) extern void die_nmi(struct pt_regs *, const char *msg); -void nmi_watchdog_tick (struct pt_regs * regs) +int nmi_show_regs[NR_CPUS]; + +void nmi_show_all_regs(void) { + int i; + + if (nmi_watchdog == NMI_NONE) + return; + if (system_state != SYSTEM_RUNNING) { + printk("nmi_show_all_regs(): system state %d, not doing.\n", + system_state); + return; + } + printk("nmi_show_all_regs(): start on CPU#%d.\n", + raw_smp_processor_id()); + dump_stack(); + + for_each_online_cpu(i) + nmi_show_regs[i] = 1; + for_each_online_cpu(i) + while (nmi_show_regs[i] == 1) + barrier(); +} + +static DEFINE_RAW_SPINLOCK(nmi_print_lock); +void notrace nmi_watchdog_tick (struct pt_regs * regs) +{ /* * Since current_thread_info()-> is always on the stack, and we * always switch the stack NMI-atomically, it's safe to use @@ -531,7 +556,16 @@ void nmi_watchdog_tick (struct pt_regs * */ int sum, cpu = smp_processor_id(); - sum = per_cpu(irq_stat, cpu).apic_timer_irqs; + sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0); + + profile_tick(CPU_PROFILING, regs); + if (nmi_show_regs[cpu]) { + nmi_show_regs[cpu] = 0; + spin_lock(&nmi_print_lock); + printk("NMI show regs on CPU#%d:\n", cpu); + show_regs(regs); + spin_unlock(&nmi_print_lock); + } if (last_irq_sums[cpu] == sum) { /* @@ -539,12 +573,25 @@ void nmi_watchdog_tick (struct pt_regs * * wait a few IRQs (5 seconds) before doing the oops ... */ alert_counter[cpu]++; - if (alert_counter[cpu] == 5*nmi_hz) - /* - * die_nmi will return ONLY if NOTIFY_STOP happens.. - */ - die_nmi(regs, "NMI Watchdog detected LOCKUP"); + if (alert_counter[cpu] && !(alert_counter[cpu] % (5*nmi_hz))) { + int i; + + bust_spinlocks(1); + spin_lock(&nmi_print_lock); + printk("NMI watchdog detected lockup on CPU#%d (%d/%d)\n", cpu, alert_counter[cpu], 5*nmi_hz); + show_regs(regs); + spin_unlock(&nmi_print_lock); + + for_each_online_cpu(i) + if (i != cpu) + nmi_show_regs[i] = 1; + for_each_online_cpu(i) + while (nmi_show_regs[i] == 1) + barrier(); + die_nmi(regs, "NMI Watchdog detected LOCKUP"); + } + } else { last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; } Index: linux.prev/arch/i386/kernel/process.c =================================================================== --- linux.prev.orig/arch/i386/kernel/process.c +++ linux.prev/arch/i386/kernel/process.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -64,6 +65,12 @@ static int hlt_counter; unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); +DEFINE_SPINLOCK(pm_idle_switch_lock); +EXPORT_SYMBOL_GPL(pm_idle_switch_lock); + +int pm_idle_locked = 0; +EXPORT_SYMBOL_GPL(pm_idle_locked); + /* * Return saved PC of a blocked thread. */ @@ -99,21 +106,21 @@ EXPORT_SYMBOL(enable_hlt); */ void default_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); if (!hlt_counter && boot_cpu_data.hlt_works_ok) { clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb__after_clear_bit(); - while (!need_resched()) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); + while (!need_resched() && !need_resched_delayed()) { + raw_local_irq_disable(); + if (!need_resched() && !need_resched_delayed()) + raw_safe_halt(); else - local_irq_enable(); + raw_local_irq_enable(); } set_thread_flag(TIF_POLLING_NRFLAG); } else { - while (!need_resched()) + while (!need_resched() && !need_resched_delayed()) cpu_relax(); } } @@ -126,16 +133,17 @@ EXPORT_SYMBOL(default_idle); * to poll the ->work.need_resched flag instead of waiting for the * cross-CPU IPI to arrive. Use this option with caution. */ -static void poll_idle (void) +void poll_idle (void) { - local_irq_enable(); + raw_local_irq_enable(); asm volatile( "2:" "testl %0, %1;" "rep; nop;" "je 2b;" - : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); + : : "i"(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED), + "m" (current_thread_info()->flags)); } #ifdef CONFIG_HOTPLUG_CPU @@ -153,7 +161,7 @@ static inline void play_dead(void) /* * With physical CPU hotplug, we should halt the cpu */ - local_irq_disable(); + raw_local_irq_disable(); while (1) halt(); } @@ -178,7 +186,9 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - while (!need_resched()) { + BUG_ON(raw_irqs_disabled()); + + while (!need_resched() && !need_resched_delayed()) { void (*idle)(void); if (__get_cpu_var(cpu_idle_state)) @@ -196,9 +206,11 @@ void cpu_idle(void) __get_cpu_var(irq_stat).idle_timestamp = jiffies; idle(); } - preempt_enable_no_resched(); - schedule(); + raw_local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + raw_local_irq_enable(); } } @@ -239,12 +251,12 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); */ static void mwait_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); - while (!need_resched()) { + while (!need_resched() && !need_resched_delayed()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); - if (need_resched()) + if (need_resched() || need_resched_delayed()) break; __mwait(0, 0); } @@ -372,11 +384,16 @@ void exit_thread(void) /* The process may have allocated an io port bitmap... nuke it. */ if (unlikely(NULL != t->io_bitmap_ptr)) { - int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + int cpu; + struct tss_struct *tss; + void *io_bitmap_ptr = t->io_bitmap_ptr; - kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; + mb(); + kfree(io_bitmap_ptr); + + cpu = get_cpu(); + tss = &per_cpu(init_tss, cpu); /* * Careful, clear this in the TSS too: */ Index: linux.prev/arch/i386/kernel/reboot.c =================================================================== --- linux.prev.orig/arch/i386/kernel/reboot.c +++ linux.prev/arch/i386/kernel/reboot.c @@ -202,7 +202,7 @@ void machine_real_restart(unsigned char { unsigned long flags; - local_irq_disable(); + raw_local_irq_disable(); /* Write zero to CMOS register number 0x0f, which the BIOS POST routine will recognize as telling it to do a proper reboot. (Well Index: linux.prev/arch/i386/kernel/semaphore.c =================================================================== --- linux.prev.orig/arch/i386/kernel/semaphore.c +++ linux.prev/arch/i386/kernel/semaphore.c @@ -13,6 +13,7 @@ * rw semaphores implemented November 1999 by Benjamin LaHaise */ #include +#include #include /* @@ -28,15 +29,15 @@ asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed\n" -"__down_failed:\n\t" +".globl __compat_down_failed\n" +"__compat_down_failed:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down\n\t" + "call __compat_down\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -49,15 +50,15 @@ asm( asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed_interruptible\n" -"__down_failed_interruptible:\n\t" +".globl __compat_down_failed_interruptible\n" +"__compat_down_failed_interruptible:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down_interruptible\n\t" + "call __compat_down_interruptible\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -70,15 +71,15 @@ asm( asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed_trylock\n" -"__down_failed_trylock:\n\t" +".globl __compat_down_failed_trylock\n" +"__compat_down_failed_trylock:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down_trylock\n\t" + "call __compat_down_trylock\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -91,45 +92,13 @@ asm( asm( ".section .sched.text\n" ".align 4\n" -".globl __up_wakeup\n" -"__up_wakeup:\n\t" +".globl __compat_up_wakeup\n" +"__compat_up_wakeup:\n\t" "pushl %edx\n\t" "pushl %ecx\n\t" - "call __up\n\t" + "call __compat_up\n\t" "popl %ecx\n\t" "popl %edx\n\t" "ret" ); -/* - * rw spinlock fallbacks - */ -#if defined(CONFIG_SMP) -asm( -".section .sched.text\n" -".align 4\n" -".globl __write_lock_failed\n" -"__write_lock_failed:\n\t" - LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" -"1: rep; nop\n\t" - "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" - "jne 1b\n\t" - LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" - "jnz __write_lock_failed\n\t" - "ret" -); - -asm( -".section .sched.text\n" -".align 4\n" -".globl __read_lock_failed\n" -"__read_lock_failed:\n\t" - LOCK "incl (%eax)\n" -"1: rep; nop\n\t" - "cmpl $1,(%eax)\n\t" - "js 1b\n\t" - LOCK "decl (%eax)\n\t" - "js __read_lock_failed\n\t" - "ret" -); -#endif Index: linux.prev/arch/i386/kernel/setup.c =================================================================== --- linux.prev.orig/arch/i386/kernel/setup.c +++ linux.prev/arch/i386/kernel/setup.c @@ -1620,6 +1620,7 @@ void __init setup_arch(char **cmdline_p) conswitchp = &dummy_con; #endif #endif + tsc_init(); } #include "setup_arch_post.h" Index: linux.prev/arch/i386/kernel/signal.c =================================================================== --- linux.prev.orig/arch/i386/kernel/signal.c +++ linux.prev/arch/i386/kernel/signal.c @@ -604,6 +604,13 @@ int fastcall do_signal(struct pt_regs *r int signr; struct k_sigaction ka; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + raw_local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which * is why we may in certain cases get here from Index: linux.prev/arch/i386/kernel/smp.c =================================================================== --- linux.prev.orig/arch/i386/kernel/smp.c +++ linux.prev/arch/i386/kernel/smp.c @@ -163,7 +163,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu unsigned long cfg; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); /* * Wait for idle. @@ -186,7 +186,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu */ apic_write_around(APIC_ICR, cfg); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void send_IPI_mask_sequence(cpumask_t mask, int vector) @@ -200,7 +200,7 @@ void send_IPI_mask_sequence(cpumask_t ma * should be modified to do 1 message per cluster ID - mbligh */ - local_irq_save(flags); + raw_local_irq_save(flags); for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { if (cpu_isset(query_cpu, mask)) { @@ -227,7 +227,7 @@ void send_IPI_mask_sequence(cpumask_t ma apic_write_around(APIC_ICR, cfg); } } - local_irq_restore(flags); + raw_local_irq_restore(flags); } #include /* must come after the send_IPI functions above for inlining */ @@ -245,7 +245,7 @@ void send_IPI_mask_sequence(cpumask_t ma static cpumask_t flush_cpumask; static struct mm_struct * flush_mm; static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); +static DEFINE_RAW_SPINLOCK(tlbstate_lock); #define FLUSH_ALL 0xffffffff /* @@ -390,7 +390,7 @@ static void flush_tlb_others(cpumask_t c while (!cpus_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ - mb(); + cpu_relax(); flush_mm = NULL; flush_va = 0; @@ -481,10 +481,20 @@ void smp_send_reschedule(int cpu) } /* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + send_IPI_allbutself(RESCHEDULE_VECTOR); +} + +/* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); struct call_data_struct { void (*func) (void *info); @@ -538,7 +548,7 @@ int smp_call_function (void (*func) (voi } /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); + WARN_ON(raw_irqs_disabled()); data.func = func; data.info = info; @@ -572,7 +582,7 @@ static void stop_this_cpu (void * dummy) * Remove this CPU: */ cpu_clear(smp_processor_id(), cpu_online_map); - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); if (cpu_data[smp_processor_id()].hlt_works_ok) for(;;) halt(); @@ -587,19 +597,20 @@ void smp_send_stop(void) { smp_call_function(stop_this_cpu, NULL, 1, 0); - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); - local_irq_enable(); + raw_local_irq_enable(); } /* - * Reschedule call back. Nothing to do, - * all the work is done automatically when - * we return from the interrupt. + * Reschedule call back. Trigger a reschedule pass so that + * RT-overload balancing can pass tasks around. */ -fastcall void smp_reschedule_interrupt(struct pt_regs *regs) +fastcall notrace void smp_reschedule_interrupt(struct pt_regs *regs) { + trace_special(regs->eip, 0, 0); ack_APIC_irq(); + set_tsk_need_resched(current); } fastcall void smp_call_function_interrupt(struct pt_regs *regs) Index: linux.prev/arch/i386/kernel/smpboot.c =================================================================== --- linux.prev.orig/arch/i386/kernel/smpboot.c +++ linux.prev/arch/i386/kernel/smpboot.c @@ -212,142 +212,299 @@ valid_k7: ; } -/* - * TSC synchronization. - * - * We first check whether all CPUs have their TSC's synchronized, - * then we print a warning if not, and always resync. - */ +static atomic_t tsc_start_flag, tsc_check_start, tsc_check_stop; -static atomic_t tsc_start_flag = ATOMIC_INIT(0); -static atomic_t tsc_count_start = ATOMIC_INIT(0); -static atomic_t tsc_count_stop = ATOMIC_INIT(0); -static unsigned long long tsc_values[NR_CPUS]; - -#define NR_LOOPS 5 - -static void __init synchronize_tsc_bp (void) +static int __init check_tsc_warp(void) { - int i; - unsigned long long t0; - unsigned long long sum, avg; - long long delta; - unsigned int one_usec; - int buggy = 0; - - printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); - - /* convert from kcyc/sec to cyc/usec */ - one_usec = cpu_khz / 1000; + static DEFINE_RAW_SPINLOCK(warp_lock); + static long long prev; + static unsigned int error; - atomic_set(&tsc_start_flag, 1); - wmb(); + int cpus = num_booting_cpus(), nr = 0; + long long start, now, end, delta; + atomic_inc(&tsc_check_start); + while (atomic_read(&tsc_check_start) != cpus) + cpu_relax(); /* - * We loop a few times to get a primed instruction cache, - * then the last pass is more or less synchronized and - * the BP and APs set their cycle counters to zero all at - * once. This reduces the chance of having random offsets - * between the processors, and guarantees that the maximum - * delay between the cycle counters is never bigger than - * the latency of information-passing (cachelines) between - * two CPUs. + * Run the check for 500 msecs: */ - for (i = 0; i < NR_LOOPS; i++) { - /* - * all APs synchronize but they loop on '== num_cpus' - */ - while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) - mb(); - atomic_set(&tsc_count_stop, 0); - wmb(); - /* - * this lets the APs save their current TSC: - */ - atomic_inc(&tsc_count_start); + rdtscll(start); + end = start + cpu_khz*500; - rdtscll(tsc_values[smp_processor_id()]); + for (;;) { /* - * We clear the TSC in the last loop: + * Check for the TSC going backwards (between CPUs): */ - if (i == NR_LOOPS-1) - write_tsc(0, 0); + spin_lock(&warp_lock); + rdtscll(now); + delta = now - prev; + prev = now; + spin_unlock(&warp_lock); + if (unlikely(delta < 0)) + error = 1; + if (now > end) + break; /* - * Wait for all APs to leave the synchronization point: + * Take it easy every couple of iterations, + * to not starve other CPUs: */ - while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) - mb(); - atomic_set(&tsc_count_start, 0); - wmb(); - atomic_inc(&tsc_count_stop); + nr++; + if (!(nr % 31)) + cpu_relax(); } - sum = 0; - for (i = 0; i < NR_CPUS; i++) { - if (cpu_isset(i, cpu_callout_map)) { - t0 = tsc_values[i]; - sum += t0; - } - } - avg = sum; - do_div(avg, num_booting_cpus()); + atomic_inc(&tsc_check_stop); + while (atomic_read(&tsc_check_stop) != cpus) + cpu_relax(); - sum = 0; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - delta = tsc_values[i] - avg; - if (delta < 0) - delta = -delta; - /* - * We report bigger than 2 microseconds clock differences. - */ - if (delta > 2*one_usec) { - long realdelta; - if (!buggy) { - buggy = 1; - printk("\n"); - } - realdelta = delta; - do_div(realdelta, one_usec); - if (tsc_values[i] < avg) - realdelta = -realdelta; + return error; +} - printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta); - } +/* + * TSC synchronization based on ia64 itc synchronization code. Synchronize + * pairs of processors rahter than tring to synchronize all of the processors + * with a single event. When several processors are all waiting for an + * event they don't all see it at the same time. The write will cause + * an invalidate on each processors cache and then they all scramble to + * re-read that cache line. + * + * Writing the TSC resets the upper 32-bits, so we need to be careful + * that all of the cpus can be synchronized before we overflow the + * 32-bit count. + */ - sum += delta; +#define MASTER 0 +#define SLAVE (SMP_CACHE_BYTES/sizeof(long)) + +#define NUM_ROUNDS 64 /* magic value */ +#define NUM_ITERS 5 /* likewise */ + +static volatile unsigned long go[2*SLAVE] __cacheline_aligned; +static volatile int current_slave = -1; +static volatile int tsc_sync_complete = 0; +static volatile int tsc_adj_latency = 0; +static unsigned int max_rt = 0; +static unsigned int max_delta = 0; + +#define DEBUG_TSC_SYNC 0 +#if DEBUG_TSC_SYNC +struct tsc_sync_debug { + long rt; /* roundtrip time */ + long master; /* master's timestamp */ + long diff; /* difference between midpoint and master's timestamp */ + long lat; /* estimate of tsc adjustment latency */ +} tsc_sync_debug[NUM_ROUNDS*NR_CPUS]; +#endif + +void +sync_master(void) +{ + unsigned long n, tsc, last_go_master; + + last_go_master = 0; + while (1) { + while ((n = go[MASTER]) == last_go_master) + rep_nop(); + if (n == ~0) + break; + rdtscl(tsc); + if (unlikely(!tsc)) + tsc = 1; + go[SLAVE] = tsc; + last_go_master = n; } - if (!buggy) - printk("passed.\n"); } -static void __init synchronize_tsc_ap (void) +/* + * Return the number of cycles by which our TSC differs from the TSC on + * the master (time-keeper) CPU. A positive number indicates our TSC is + * ahead of the master, negative that it is behind. + */ +static inline long +get_delta (long *rt, long *master) { - int i; + unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; + unsigned long tcenter, t0, t1, tm, last_go_slave; + long i; + + last_go_slave = go[SLAVE]; + for (i = 0; i < NUM_ITERS; ++i) { + rdtscl(t0); + go[MASTER] = i+1; + while ((tm = go[SLAVE]) == last_go_slave) + rep_nop(); + rdtscl(t1); + + if (t1 - t0 < best_t1 - best_t0) + best_t0 = t0, best_t1 = t1, best_tm = tm; + last_go_slave = tm; + } + + *rt = best_t1 - best_t0; + *master = best_tm - best_t0; + + /* average best_t0 and best_t1 without overflow: */ + tcenter = (best_t0/2 + best_t1/2); + if (best_t0 % 2 + best_t1 % 2 == 2) + ++tcenter; + return tcenter - best_tm; +} + +/* + * Synchronize TSC of the current (slave) CPU with the TSC of the MASTER CPU + * (normally the time-keeper CPU). We use a closed loop to eliminate the + * possibility of unaccounted-for errors (such as getting a machine check in + * the middle of a calibration step). The basic idea is for the slave to ask + * the master what TSC value it has and to read its own TSC before and after + * the master responds. Each iteration gives us three + * timestamps: + * + * slave master + * + * t0 ---\ + * ---\ + * ---> + * tm + * /--- + * /--- + * t1 <--- + * + * + * The goal is to adjust the slave's TSC such that tm falls exactly half-way + * between t0 and t1. If we achieve this, the clocks are synchronized provided + * the interconnect between the slave and the master is symmetric. Even if the + * interconnect were asymmetric, we would still know that the synchronization + * error is smaller than the roundtrip latency (t0 - t1). + * + * When the interconnect is quiet and symmetric, this lets us synchronize the + * TSC to within one or two cycles. However, we can only *guarantee* that the + * synchronization is accurate to within a round-trip time, which is typically + * in the range of several hundred cycles (e.g., ~500 cycles). In practice, + * this means that the TSC's are usually almost perfectly synchronized, but we + * shouldn't assume that the accuracy is much better than half a micro second + * or so. + */ + +static void __init +synchronize_tsc_ap (void) +{ + long i, delta, adj, adjust_latency, n_rounds; + unsigned long rt, master_time_stamp, tsc; +#if DEBUG_TSC_SYNC + struct tsc_sync_debug *t = + &tsc_sync_debug[smp_processor_id() * NUM_ROUNDS]; +#endif + + while (!atomic_read(&tsc_start_flag)) + mb(); + + if (!check_tsc_warp()) + return; /* - * Not every cpu is online at the time - * this gets called, so we first wait for the BP to - * finish SMP initialization: + * Wait for our turn to synchronize with the boot processor. */ - while (!atomic_read(&tsc_start_flag)) mb(); + while (current_slave != smp_processor_id()) + rep_nop(); + adjust_latency = tsc_adj_latency; + + go[SLAVE] = 0; + go[MASTER] = 0; + write_tsc(0,0); + for (i = 0; i < NUM_ROUNDS; ++i) { + delta = get_delta(&rt, &master_time_stamp); + if (delta == 0) + break; + + if (i > 0) + adjust_latency += -delta; + adj = -delta + adjust_latency/8; + rdtscl(tsc); + write_tsc(tsc + adj, 0); +#if DEBUG_TSC_SYNC + t[i].rt = rt; + t[i].master = master_time_stamp; + t[i].diff = delta; + t[i].lat = adjust_latency/8; +#endif + } + n_rounds = i; + go[MASTER] = ~0; + +#if (DEBUG_TSC_SYNC == 2) + for (i = 0; i < n_rounds; ++i) + printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", + t[i].rt, t[i].master, t[i].diff, t[i].lat); + + printk("CPU %d: synchronized TSC (last diff %ld cycles, maxerr %lu cycles)\n", + smp_processor_id(), delta, rt); + + printk("It took %ld rounds\n", n_rounds); +#endif + if (rt > max_rt) + max_rt = rt; + if (delta < 0) + delta = -delta; + if (delta > max_delta) + max_delta = delta; + tsc_adj_latency = adjust_latency; + current_slave = -1; + while (!tsc_sync_complete) + rep_nop(); +} + +/* + * The boot processor set its own TSC to zero and then gives each + * slave processor the chance to synchronize itself. + */ - for (i = 0; i < NR_LOOPS; i++) { - atomic_inc(&tsc_count_start); - while (atomic_read(&tsc_count_start) != num_booting_cpus()) - mb(); +static void __init synchronize_tsc_bp (void) +{ + unsigned int tsc_low, tsc_high, error; + int cpu; + + atomic_set(&tsc_start_flag, 1); - rdtscll(tsc_values[smp_processor_id()]); - if (i == NR_LOOPS-1) - write_tsc(0, 0); + printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", + num_booting_cpus()); - atomic_inc(&tsc_count_stop); - while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb(); + if (!check_tsc_warp()) { + printk("passed.\n"); + return; + } + printk("failed.\n"); + + printk(KERN_INFO "starting TSC synchronization\n"); + write_tsc(0, 0); + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (!cpu_isset(cpu, cpu_callout_map)) + continue; + if (cpu == smp_processor_id()) + continue; + go[MASTER] = 0; + current_slave = cpu; + sync_master(); + while (current_slave != -1) + rep_nop(); + } + rdtsc(tsc_low, tsc_high); + if (tsc_high) + printk("TSC overflowed during synchronization\n"); + else + printk("TSC synchronization complete max_delta=%d cycles\n", + max_delta); + if (max_rt < 4293) { + error = (max_rt * 1000000)/cpu_khz; + printk("TSC sync round-trip time %d.%03d microseconds\n", + error/1000, error%1000); + } else { + printk("TSC sync round-trip time %d cycles\n", max_rt); } + tsc_sync_complete = 1; } -#undef NR_LOOPS extern void calibrate_delay(void); @@ -547,7 +704,7 @@ static void __devinit start_secondary(vo per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; /* We can take interrupts now: we're officially "up". */ - local_irq_enable(); + raw_local_irq_enable(); wmb(); cpu_idle(); @@ -1340,9 +1497,9 @@ int __cpu_disable(void) clear_local_APIC(); /* Allow any queued timer interrupts to get serviced */ - local_irq_enable(); + raw_local_irq_enable(); mdelay(1); - local_irq_disable(); + raw_local_irq_disable(); remove_siblinginfo(cpu); @@ -1386,11 +1543,11 @@ int __devinit __cpu_up(unsigned int cpu) /* In case one didn't come up */ if (!cpu_isset(cpu, cpu_callin_map)) { printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu); - local_irq_enable(); + raw_local_irq_enable(); return -EIO; } - local_irq_enable(); + raw_local_irq_enable(); per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); Index: linux.prev/arch/i386/kernel/switch2poll.c =================================================================== --- /dev/null +++ linux.prev/arch/i386/kernel/switch2poll.c @@ -0,0 +1,5 @@ +/* + * Same type of hack used for early_printk. This keeps the code + * in one place. + */ +#include "../../x86_64/kernel/switch2poll.c" Index: linux.prev/arch/i386/kernel/time.c =================================================================== --- linux.prev.orig/arch/i386/kernel/time.c +++ linux.prev/arch/i386/kernel/time.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,7 @@ #include #include #include +#include #include "mach_time.h" @@ -79,16 +81,9 @@ EXPORT_SYMBOL(cpu_khz); extern unsigned long wall_jiffies; -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); -#include - -DEFINE_SPINLOCK(i8253_lock); -EXPORT_SYMBOL(i8253_lock); - -struct timer_opts *cur_timer __read_mostly = &timer_none; - /* * This is a special lock that is owned by the CPU and holds the index * register we are working with. It is required for NMI access to the @@ -118,118 +113,25 @@ void rtc_cmos_write(unsigned char val, u } EXPORT_SYMBOL(rtc_cmos_write); -/* - * This version of gettimeofday has microsecond resolution - * and better than microsecond precision on fast x86 machines with TSC. - */ -void do_gettimeofday(struct timeval *tv) -{ - unsigned long seq; - unsigned long usec, sec; - unsigned long max_ntp_tick; - - do { - unsigned long lost; - - seq = read_seqbegin(&xtime_lock); - - usec = cur_timer->get_offset(); - lost = jiffies - wall_jiffies; - - /* - * If time_adjust is negative then NTP is slowing the clock - * so make sure not to go into next possible interval. - * Better to lose some accuracy than have time go backwards.. - */ - if (unlikely(time_adjust < 0)) { - max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj; - usec = min(usec, max_ntp_tick); - - if (lost) - usec += lost * max_ntp_tick; - } - else if (unlikely(lost)) - usec += lost * (USEC_PER_SEC / HZ); - - sec = xtime.tv_sec; - usec += (xtime.tv_nsec / 1000); - } while (read_seqretry(&xtime_lock, seq)); - - while (usec >= 1000000) { - usec -= 1000000; - sec++; - } - - tv->tv_sec = sec; - tv->tv_usec = usec; -} - -EXPORT_SYMBOL(do_gettimeofday); - -int do_settimeofday(struct timespec *tv) -{ - time_t wtm_sec, sec = tv->tv_sec; - long wtm_nsec, nsec = tv->tv_nsec; - - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; - - write_seqlock_irq(&xtime_lock); - /* - * This is revolting. We need to set "xtime" correctly. However, the - * value in this location is the value at the most recent update of - * wall time. Discover what correction gettimeofday() would have - * made, and then undo it! - */ - nsec -= cur_timer->get_offset() * NSEC_PER_USEC; - nsec -= (jiffies - wall_jiffies) * TICK_NSEC; - - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); - - set_normalized_timespec(&xtime, sec, nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - - ntp_clear(); - write_sequnlock_irq(&xtime_lock); - clock_was_set(); - return 0; -} - -EXPORT_SYMBOL(do_settimeofday); - static int set_rtc_mmss(unsigned long nowtime) { int retval; - - WARN_ON(irqs_disabled()); + unsigned long flags; /* gets recalled with irq locally disabled */ - spin_lock_irq(&rtc_lock); + /* XXX - does irqsave resolve this? -johnstul */ + spin_lock_irqsave(&rtc_lock, flags); if (efi_enabled) retval = efi_set_rtc_mmss(nowtime); else retval = mach_set_rtc_mmss(nowtime); - spin_unlock_irq(&rtc_lock); + spin_unlock_irqrestore(&rtc_lock, flags); return retval; } - -int timer_ack; - -/* monotonic_clock(): returns # of nanoseconds passed since time_init() - * Note: This function is required to return accurate - * time even in the absence of multiple timer ticks. - */ -unsigned long long monotonic_clock(void) -{ - return cur_timer->monotonic_clock(); -} -EXPORT_SYMBOL(monotonic_clock); - #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) -unsigned long profile_pc(struct pt_regs *regs) +unsigned long notrace profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); @@ -241,70 +143,6 @@ unsigned long profile_pc(struct pt_regs EXPORT_SYMBOL(profile_pc); #endif -/* - * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick - */ -static inline void do_timer_interrupt(int irq, struct pt_regs *regs) -{ -#ifdef CONFIG_X86_IO_APIC - if (timer_ack) { - /* - * Subtle, when I/O APICs are used we have to ack timer IRQ - * manually to reset the IRR bit for do_slow_gettimeoffset(). - * This will also deassert NMI lines for the watchdog if run - * on an 82489DX-based system. - */ - spin_lock(&i8259A_lock); - outb(0x0c, PIC_MASTER_OCW3); - /* Ack the IRQ; AEOI will end it automatically. */ - inb(PIC_MASTER_POLL); - spin_unlock(&i8259A_lock); - } -#endif - - do_timer_interrupt_hook(regs); - - - if (MCA_bus) { - /* The PS/2 uses level-triggered interrupts. You can't - turn them off, nor would you want to (any attempt to - enable edge-triggered interrupts usually gets intercepted by a - special hardware circuit). Hence we have to acknowledge - the timer interrupt. Through some incredibly stupid - design idea, the reset for IRQ 0 is done by setting the - high bit of the PPI port B (0x61). Note that some PS/2s, - notably the 55SX, work fine if this is removed. */ - - irq = inb_p( 0x61 ); /* read the current state */ - outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ - } -} - -/* - * This is the same as the above, except we _also_ save the current - * Time Stamp Counter value at the time of the timer interrupt, so that - * we later on can estimate the time of day more exactly. - */ -irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - /* - * Here we are in the timer irq handler. We just have irqs locally - * disabled but we don't know if the timer_bh is running on the other - * CPU. We need to avoid to SMP race with it. NOTE: we don' t need - * the irq version of write_lock because as just said we have irq - * locally disabled. -arca - */ - write_seqlock(&xtime_lock); - - cur_timer->mark_offset(); - - do_timer_interrupt(irq, regs); - - write_sequnlock(&xtime_lock); - return IRQ_HANDLED; -} - /* not static: needed by APM */ unsigned long get_cmos_time(void) { @@ -323,139 +161,42 @@ unsigned long get_cmos_time(void) } EXPORT_SYMBOL(get_cmos_time); -static void sync_cmos_clock(unsigned long dummy); - -static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); - -static void sync_cmos_clock(unsigned long dummy) +/* arch specific timeofday hooks */ +nsec_t read_persistent_clock(void) { - struct timeval now, next; - int fail = 1; + return (nsec_t)get_cmos_time() * NSEC_PER_SEC; +} +void sync_persistent_clock(struct timespec ts) +{ + static unsigned long last_rtc_update; /* * If we have an externally synchronized Linux clock, then update * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be * called as close as possible to 500 ms before the new second starts. - * This code is run on a timer. If the clock is set, that timer - * may not expire at the correct time. Thus, we adjust... */ - if (!ntp_synced()) - /* - * Not synced, exit, do not restart a timer (if one is - * running, let it run out). - */ + if (ts.tv_sec <= last_rtc_update + 660) return; - do_gettimeofday(&now); - if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 && - now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) - fail = set_rtc_mmss(now.tv_sec); - - next.tv_usec = USEC_AFTER - now.tv_usec; - if (next.tv_usec <= 0) - next.tv_usec += USEC_PER_SEC; - - if (!fail) - next.tv_sec = 659; - else - next.tv_sec = 0; - - if (next.tv_usec >= USEC_PER_SEC) { - next.tv_sec++; - next.tv_usec -= USEC_PER_SEC; + if((ts.tv_nsec / 1000) >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 && + (ts.tv_nsec / 1000) <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) { + /* horrible...FIXME */ + if (set_rtc_mmss(ts.tv_sec) == 0) + last_rtc_update = ts.tv_sec; + else + last_rtc_update = ts.tv_sec - 600; /* do it again in 60 s */ } - mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next)); -} - -void notify_arch_cmos_timer(void) -{ - mod_timer(&sync_cmos_timer, jiffies + 1); -} - -static long clock_cmos_diff, sleep_start; - -static struct timer_opts *last_timer; -static int timer_suspend(struct sys_device *dev, pm_message_t state) -{ - /* - * Estimate time zone so that set_time can update the clock - */ - clock_cmos_diff = -get_cmos_time(); - clock_cmos_diff += get_seconds(); - sleep_start = get_cmos_time(); - last_timer = cur_timer; - cur_timer = &timer_none; - if (last_timer->suspend) - last_timer->suspend(state); - return 0; -} - -static int timer_resume(struct sys_device *dev) -{ - unsigned long flags; - unsigned long sec; - unsigned long sleep_length; - -#ifdef CONFIG_HPET_TIMER - if (is_hpet_enabled()) - hpet_reenable(); -#endif - setup_pit_timer(); - sec = get_cmos_time() + clock_cmos_diff; - sleep_length = (get_cmos_time() - sleep_start) * HZ; - write_seqlock_irqsave(&xtime_lock, flags); - xtime.tv_sec = sec; - xtime.tv_nsec = 0; - write_sequnlock_irqrestore(&xtime_lock, flags); - jiffies += sleep_length; - wall_jiffies += sleep_length; - if (last_timer->resume) - last_timer->resume(); - cur_timer = last_timer; - last_timer = NULL; - touch_softlockup_watchdog(); - return 0; } -static struct sysdev_class timer_sysclass = { - .resume = timer_resume, - .suspend = timer_suspend, - set_kset_name("timer"), -}; - - -/* XXX this driverfs stuff should probably go elsewhere later -john */ -static struct sys_device device_timer = { - .id = 0, - .cls = &timer_sysclass, -}; - -static int time_init_device(void) -{ - int error = sysdev_class_register(&timer_sysclass); - if (!error) - error = sysdev_register(&device_timer); - return error; -} - -device_initcall(time_init_device); - #ifdef CONFIG_HPET_TIMER extern void (*late_time_init)(void); /* Duplicate of time_init() below, with hpet_enable part added */ static void __init hpet_time_init(void) { - xtime.tv_sec = get_cmos_time(); - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); - if ((hpet_enable() >= 0) && hpet_use_timer) { printk("Using HPET for base-timer\n"); } - cur_timer = select_timer(); - printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); time_init_hook(); } @@ -463,6 +204,9 @@ static void __init hpet_time_init(void) void __init time_init(void) { + /* Set the clock to HZ Hz: */ + setup_pit_timer(); + #ifdef CONFIG_HPET_TIMER if (is_hpet_capable()) { /* @@ -473,13 +217,5 @@ void __init time_init(void) return; } #endif - xtime.tv_sec = get_cmos_time(); - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); - - cur_timer = select_timer(); - printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); - time_init_hook(); } Index: linux.prev/arch/i386/kernel/time_hpet.c =================================================================== --- linux.prev.orig/arch/i386/kernel/time_hpet.c +++ linux.prev/arch/i386/kernel/time_hpet.c @@ -259,8 +259,6 @@ __setup("hpet=", hpet_setup); #include #include -extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs); - #define DEFAULT_RTC_INT_FREQ 64 #define RTC_NUM_INTS 1 @@ -303,12 +301,12 @@ int hpet_rtc_timer_init(void) else hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; - local_irq_save(flags); + raw_local_irq_save(flags); cnt = hpet_readl(HPET_COUNTER); cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); hpet_writel(cnt, HPET_T1_CMP); hpet_t1_cmp = cnt; - local_irq_restore(flags); + raw_local_irq_restore(flags); cfg = hpet_readl(HPET_T1_CFG); cfg &= ~HPET_TN_PERIODIC; Index: linux.prev/arch/i386/kernel/timers/Makefile =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -# -# Makefile for x86 timers -# - -obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o - -obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o -obj-$(CONFIG_HPET_TIMER) += timer_hpet.o -obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o Index: linux.prev/arch/i386/kernel/timers/common.c =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/common.c +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Common functions used across the timers go here - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mach_timer.h" - -/* ------ Calibrate the TSC ------- - * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). - * Too much 64-bit arithmetic here to do this cleanly in C, and for - * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2) - * output busy loop as low as possible. We avoid reading the CTC registers - * directly because of the awkward 8-bit access mechanism of the 82C54 - * device. - */ - -#define CALIBRATE_TIME (5 * 1000020/HZ) - -unsigned long calibrate_tsc(void) -{ - mach_prepare_counter(); - - { - unsigned long startlow, starthigh; - unsigned long endlow, endhigh; - unsigned long count; - - rdtsc(startlow,starthigh); - mach_countup(&count); - rdtsc(endlow,endhigh); - - - /* Error: ECTCNEVERSET */ - if (count <= 1) - goto bad_ctc; - - /* 64-bit subtract - gcc just messes up with long longs */ - __asm__("subl %2,%0\n\t" - "sbbl %3,%1" - :"=a" (endlow), "=d" (endhigh) - :"g" (startlow), "g" (starthigh), - "0" (endlow), "1" (endhigh)); - - /* Error: ECPUTOOFAST */ - if (endhigh) - goto bad_ctc; - - /* Error: ECPUTOOSLOW */ - if (endlow <= CALIBRATE_TIME) - goto bad_ctc; - - __asm__("divl %2" - :"=a" (endlow), "=d" (endhigh) - :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME)); - - return endlow; - } - - /* - * The CTC wasn't reliable: we got a hit on the very first read, - * or the CPU was so fast/slow that the quotient wouldn't fit in - * 32 bits.. - */ -bad_ctc: - return 0; -} - -#ifdef CONFIG_HPET_TIMER -/* ------ Calibrate the TSC using HPET ------- - * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq. - * Second output is parameter 1 (when non NULL) - * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet(). - * calibrate_tsc() calibrates the processor TSC by comparing - * it to the HPET timer of known frequency. - * Too much 64-bit arithmetic here to do this cleanly in C - */ -#define CALIBRATE_CNT_HPET (5 * hpet_tick) -#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC) - -unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr) -{ - unsigned long tsc_startlow, tsc_starthigh; - unsigned long tsc_endlow, tsc_endhigh; - unsigned long hpet_start, hpet_end; - unsigned long result, remain; - - hpet_start = hpet_readl(HPET_COUNTER); - rdtsc(tsc_startlow, tsc_starthigh); - do { - hpet_end = hpet_readl(HPET_COUNTER); - } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET); - rdtsc(tsc_endlow, tsc_endhigh); - - /* 64-bit subtract - gcc just messes up with long longs */ - __asm__("subl %2,%0\n\t" - "sbbl %3,%1" - :"=a" (tsc_endlow), "=d" (tsc_endhigh) - :"g" (tsc_startlow), "g" (tsc_starthigh), - "0" (tsc_endlow), "1" (tsc_endhigh)); - - /* Error: ECPUTOOFAST */ - if (tsc_endhigh) - goto bad_calibration; - - /* Error: ECPUTOOSLOW */ - if (tsc_endlow <= CALIBRATE_TIME_HPET) - goto bad_calibration; - - ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET); - if (remain > (tsc_endlow >> 1)) - result++; /* rounding the result */ - - if (tsc_hpet_quotient_ptr) { - unsigned long tsc_hpet_quotient; - - ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0, - CALIBRATE_CNT_HPET); - if (remain > (tsc_endlow >> 1)) - tsc_hpet_quotient++; /* rounding the result */ - *tsc_hpet_quotient_ptr = tsc_hpet_quotient; - } - - return result; -bad_calibration: - /* - * the CPU was so fast/slow that the quotient wouldn't fit in - * 32 bits.. - */ - return 0; -} -#endif - - -unsigned long read_timer_tsc(void) -{ - unsigned long retval; - rdtscl(retval); - return retval; -} - - -/* calculate cpu_khz */ -void init_cpu_khz(void) -{ - if (cpu_has_tsc) { - unsigned long tsc_quotient = calibrate_tsc(); - if (tsc_quotient) { - /* report CPU clock rate in Hz. - * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = - * clock/second. Our precision is about 100 ppm. - */ - { unsigned long eax=0, edx=1000; - __asm__("divl %2" - :"=a" (cpu_khz), "=d" (edx) - :"r" (tsc_quotient), - "0" (eax), "1" (edx)); - printk("Detected %u.%03u MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - } - } - } -} - Index: linux.prev/arch/i386/kernel/timers/timer.c =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/timer.c +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include -#include -#include - -#ifdef CONFIG_HPET_TIMER -/* - * HPET memory read is slower than tsc reads, but is more dependable as it - * always runs at constant frequency and reduces complexity due to - * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use - * timer_pit when HPET is active. So, we default to timer_tsc. - */ -#endif -/* list of timers, ordered by preference, NULL terminated */ -static struct init_timer_opts* __initdata timers[] = { -#ifdef CONFIG_X86_CYCLONE_TIMER - &timer_cyclone_init, -#endif -#ifdef CONFIG_HPET_TIMER - &timer_hpet_init, -#endif -#ifdef CONFIG_X86_PM_TIMER - &timer_pmtmr_init, -#endif - &timer_tsc_init, - &timer_pit_init, - NULL, -}; - -static char clock_override[10] __initdata; - -static int __init clock_setup(char* str) -{ - if (str) - strlcpy(clock_override, str, sizeof(clock_override)); - return 1; -} -__setup("clock=", clock_setup); - - -/* The chosen timesource has been found to be bad. - * Fall back to a known good timesource (the PIT) - */ -void clock_fallback(void) -{ - cur_timer = &timer_pit; -} - -/* iterates through the list of timers, returning the first - * one that initializes successfully. - */ -struct timer_opts* __init select_timer(void) -{ - int i = 0; - - /* find most preferred working timer */ - while (timers[i]) { - if (timers[i]->init) - if (timers[i]->init(clock_override) == 0) - return timers[i]->opts; - ++i; - } - - panic("select_timer: Cannot find a suitable timer\n"); - return NULL; -} - -int read_current_timer(unsigned long *timer_val) -{ - if (cur_timer->read_timer) { - *timer_val = cur_timer->read_timer(); - return 0; - } - return -1; -} Index: linux.prev/arch/i386/kernel/timers/timer_cyclone.c =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/timer_cyclone.c +++ /dev/null @@ -1,259 +0,0 @@ -/* Cyclone-timer: - * This code implements timer_ops for the cyclone counter found - * on IBM x440, x360, and other Summit based systems. - * - * Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com) - */ - - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "io_ports.h" - -/* Number of usecs that the last interrupt was delayed */ -static int delay_at_last_interrupt; - -#define CYCLONE_CBAR_ADDR 0xFEB00CD0 -#define CYCLONE_PMCC_OFFSET 0x51A0 -#define CYCLONE_MPMC_OFFSET 0x51D0 -#define CYCLONE_MPCS_OFFSET 0x51A8 -#define CYCLONE_TIMER_FREQ 100000000 -#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */ -int use_cyclone = 0; - -static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */ -static u32 last_cyclone_low; -static u32 last_cyclone_high; -static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; - -/* helper macro to atomically read both cyclone counter registers */ -#define read_cyclone_counter(low,high) \ - do{ \ - high = cyclone_timer[1]; low = cyclone_timer[0]; \ - } while (high != cyclone_timer[1]); - - -static void mark_offset_cyclone(void) -{ - unsigned long lost, delay; - unsigned long delta = last_cyclone_low; - int count; - unsigned long long this_offset, last_offset; - - write_seqlock(&monotonic_lock); - last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; - - spin_lock(&i8253_lock); - read_cyclone_counter(last_cyclone_low,last_cyclone_high); - - /* read values for delay_at_last_interrupt */ - outb_p(0x00, 0x43); /* latch the count ASAP */ - - count = inb_p(0x40); /* read the latched count */ - count |= inb(0x40) << 8; - - /* - * VIA686a test code... reset the latch if count > max + 1 - * from timer_pit.c - cjb - */ - if (count > LATCH) { - outb_p(0x34, PIT_MODE); - outb_p(LATCH & 0xff, PIT_CH0); - outb(LATCH >> 8, PIT_CH0); - count = LATCH - 1; - } - spin_unlock(&i8253_lock); - - /* lost tick compensation */ - delta = last_cyclone_low - delta; - delta /= (CYCLONE_TIMER_FREQ/1000000); - delta += delay_at_last_interrupt; - lost = delta/(1000000/HZ); - delay = delta%(1000000/HZ); - if (lost >= 2) - jiffies_64 += lost-1; - - /* update the monotonic base value */ - this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; - monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK; - write_sequnlock(&monotonic_lock); - - /* calculate delay_at_last_interrupt */ - count = ((LATCH-1) - count) * TICK_SIZE; - delay_at_last_interrupt = (count + LATCH/2) / LATCH; - - - /* catch corner case where tick rollover occured - * between cyclone and pit reads (as noted when - * usec delta is > 90% # of usecs/tick) - */ - if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) - jiffies_64++; -} - -static unsigned long get_offset_cyclone(void) -{ - u32 offset; - - if(!cyclone_timer) - return delay_at_last_interrupt; - - /* Read the cyclone timer */ - offset = cyclone_timer[0]; - - /* .. relative to previous jiffy */ - offset = offset - last_cyclone_low; - - /* convert cyclone ticks to microseconds */ - /* XXX slow, can we speed this up? */ - offset = offset/(CYCLONE_TIMER_FREQ/1000000); - - /* our adjusted time offset in microseconds */ - return delay_at_last_interrupt + offset; -} - -static unsigned long long monotonic_clock_cyclone(void) -{ - u32 now_low, now_high; - unsigned long long last_offset, this_offset, base; - unsigned long long ret; - unsigned seq; - - /* atomically read monotonic base & last_offset */ - do { - seq = read_seqbegin(&monotonic_lock); - last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; - base = monotonic_base; - } while (read_seqretry(&monotonic_lock, seq)); - - - /* Read the cyclone counter */ - read_cyclone_counter(now_low,now_high); - this_offset = ((unsigned long long)now_high<<32)|now_low; - - /* convert to nanoseconds */ - ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK); - return ret * (1000000000 / CYCLONE_TIMER_FREQ); -} - -static int __init init_cyclone(char* override) -{ - u32* reg; - u32 base; /* saved cyclone base address */ - u32 pageaddr; /* page that contains cyclone_timer register */ - u32 offset; /* offset from pageaddr to cyclone_timer register */ - int i; - - /* check clock override */ - if (override[0] && strncmp(override,"cyclone",7)) - return -ENODEV; - - /*make sure we're on a summit box*/ - if(!use_cyclone) return -ENODEV; - - printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n"); - - /* find base address */ - pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK; - offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK); - set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); - reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); - if(!reg){ - printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n"); - return -ENODEV; - } - base = *reg; - if(!base){ - printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n"); - return -ENODEV; - } - - /* setup PMCC */ - pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK; - offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK); - set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); - reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); - if(!reg){ - printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n"); - return -ENODEV; - } - reg[0] = 0x00000001; - - /* setup MPCS */ - pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK; - offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK); - set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); - reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); - if(!reg){ - printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n"); - return -ENODEV; - } - reg[0] = 0x00000001; - - /* map in cyclone_timer */ - pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK; - offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK); - set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); - cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); - if(!cyclone_timer){ - printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n"); - return -ENODEV; - } - - /*quick test to make sure its ticking*/ - for(i=0; i<3; i++){ - u32 old = cyclone_timer[0]; - int stall = 100; - while(stall--) barrier(); - if(cyclone_timer[0] == old){ - printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n"); - cyclone_timer = 0; - return -ENODEV; - } - } - - init_cpu_khz(); - - /* Everything looks good! */ - return 0; -} - - -static void delay_cyclone(unsigned long loops) -{ - unsigned long bclock, now; - if(!cyclone_timer) - return; - bclock = cyclone_timer[0]; - do { - rep_nop(); - now = cyclone_timer[0]; - } while ((now-bclock) < loops); -} -/************************************************************/ - -/* cyclone timer_opts struct */ -static struct timer_opts timer_cyclone = { - .name = "cyclone", - .mark_offset = mark_offset_cyclone, - .get_offset = get_offset_cyclone, - .monotonic_clock = monotonic_clock_cyclone, - .delay = delay_cyclone, -}; - -struct init_timer_opts __initdata timer_cyclone_init = { - .init = init_cyclone, - .opts = &timer_cyclone, -}; Index: linux.prev/arch/i386/kernel/timers/timer_hpet.c =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/timer_hpet.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - * This code largely moved from arch/i386/kernel/time.c. - * See comments there for proper credits. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "io_ports.h" -#include "mach_timer.h" -#include - -static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */ -static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */ -static unsigned long hpet_last; /* hpet counter value at last tick*/ -static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ -static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ -static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; - -/* convert from cycles(64bits) => nanoseconds (64bits) - * basic equation: - * ns = cycles / (freq / ns_per_sec) - * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_khz * 10^3)) - * ns = cycles * (10^6 / cpu_khz) - * - * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^6 * SC / cpu_khz) / SC - * ns = cycles * cyc2ns_scale / SC - * - * And since SC is a constant power of two, we can convert the div - * into a shift. - * - * We can use khz divisor instead of mhz to keep a better percision, since - * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. - * (mathieu.desnoyers@polymtl.ca) - * - * -johnstul@us.ibm.com "math is hard, lets go shopping!" - */ -static unsigned long cyc2ns_scale; -#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ - -static inline void set_cyc2ns_scale(unsigned long cpu_khz) -{ - cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; -} - -static inline unsigned long long cycles_2_ns(unsigned long long cyc) -{ - return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; -} - -static unsigned long long monotonic_clock_hpet(void) -{ - unsigned long long last_offset, this_offset, base; - unsigned seq; - - /* atomically read monotonic base & last_offset */ - do { - seq = read_seqbegin(&monotonic_lock); - last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - base = monotonic_base; - } while (read_seqretry(&monotonic_lock, seq)); - - /* Read the Time Stamp Counter */ - rdtscll(this_offset); - - /* return the value in ns */ - return base + cycles_2_ns(this_offset - last_offset); -} - -static unsigned long get_offset_hpet(void) -{ - register unsigned long eax, edx; - - eax = hpet_readl(HPET_COUNTER); - eax -= hpet_last; /* hpet delta */ - eax = min(hpet_tick, eax); - /* - * Time offset = (hpet delta) * ( usecs per HPET clock ) - * = (hpet delta) * ( usecs per tick / HPET clocks per tick) - * = (hpet delta) * ( hpet_usec_quotient ) / (2^32) - * - * Where, - * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick - * - * Using a mull instead of a divl saves some cycles in critical path. - */ - ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax); - - /* our adjusted time offset in microseconds */ - return edx; -} - -static void mark_offset_hpet(void) -{ - unsigned long long this_offset, last_offset; - unsigned long offset; - - write_seqlock(&monotonic_lock); - last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - rdtsc(last_tsc_low, last_tsc_high); - - if (hpet_use_timer) - offset = hpet_readl(HPET_T0_CMP) - hpet_tick; - else - offset = hpet_readl(HPET_COUNTER); - if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) { - int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1; - jiffies_64 += lost_ticks; - } - hpet_last = offset; - - /* update the monotonic base value */ - this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - monotonic_base += cycles_2_ns(this_offset - last_offset); - write_sequnlock(&monotonic_lock); -} - -static void delay_hpet(unsigned long loops) -{ - unsigned long hpet_start, hpet_end; - unsigned long eax; - - /* loops is the number of cpu cycles. Convert it to hpet clocks */ - ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops); - - hpet_start = hpet_readl(HPET_COUNTER); - do { - rep_nop(); - hpet_end = hpet_readl(HPET_COUNTER); - } while ((hpet_end - hpet_start) < (loops)); -} - -static struct timer_opts timer_hpet; - -static int __init init_hpet(char* override) -{ - unsigned long result, remain; - - /* check clock override */ - if (override[0] && strncmp(override,"hpet",4)) - return -ENODEV; - - if (!is_hpet_enabled()) - return -ENODEV; - - printk("Using HPET for gettimeofday\n"); - if (cpu_has_tsc) { - unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient); - if (tsc_quotient) { - /* report CPU clock rate in Hz. - * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = - * clock/second. Our precision is about 100 ppm. - */ - { unsigned long eax=0, edx=1000; - ASM_DIV64_REG(cpu_khz, edx, tsc_quotient, - eax, edx); - printk("Detected %u.%03u MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - } - set_cyc2ns_scale(cpu_khz); - } - /* set this only when cpu_has_tsc */ - timer_hpet.read_timer = read_timer_tsc; - } - - /* - * Math to calculate hpet to usec multiplier - * Look for the comments at get_offset_hpet() - */ - ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC); - if (remain > (hpet_tick >> 1)) - result++; /* rounding the result */ - hpet_usec_quotient = result; - - return 0; -} - -static int hpet_resume(void) -{ - write_seqlock(&monotonic_lock); - /* Assume this is the last mark offset time */ - rdtsc(last_tsc_low, last_tsc_high); - - if (hpet_use_timer) - hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick; - else - hpet_last = hpet_readl(HPET_COUNTER); - write_sequnlock(&monotonic_lock); - return 0; -} -/************************************************************/ - -/* tsc timer_opts struct */ -static struct timer_opts timer_hpet __read_mostly = { - .name = "hpet", - .mark_offset = mark_offset_hpet, - .get_offset = get_offset_hpet, - .monotonic_clock = monotonic_clock_hpet, - .delay = delay_hpet, - .resume = hpet_resume, -}; - -struct init_timer_opts __initdata timer_hpet_init = { - .init = init_hpet, - .opts = &timer_hpet, -}; Index: linux.prev/arch/i386/kernel/timers/timer_none.c =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/timer_none.c +++ /dev/null @@ -1,39 +0,0 @@ -#include -#include - -static void mark_offset_none(void) -{ - /* nothing needed */ -} - -static unsigned long get_offset_none(void) -{ - return 0; -} - -static unsigned long long monotonic_clock_none(void) -{ - return 0; -} - -static void delay_none(unsigned long loops) -{ - int d0; - __asm__ __volatile__( - "\tjmp 1f\n" - ".align 16\n" - "1:\tjmp 2f\n" - ".align 16\n" - "2:\tdecl %0\n\tjns 2b" - :"=&a" (d0) - :"0" (loops)); -} - -/* none timer_opts struct */ -struct timer_opts timer_none = { - .name = "none", - .mark_offset = mark_offset_none, - .get_offset = get_offset_none, - .monotonic_clock = monotonic_clock_none, - .delay = delay_none, -}; Index: linux.prev/arch/i386/kernel/timers/timer_pit.c =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/timer_pit.c +++ /dev/null @@ -1,177 +0,0 @@ -/* - * This code largely moved from arch/i386/kernel/time.c. - * See comments there for proper credits. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "do_timer.h" -#include "io_ports.h" - -static int count_p; /* counter in get_offset_pit() */ - -static int __init init_pit(char* override) -{ - /* check clock override */ - if (override[0] && strncmp(override,"pit",3)) - printk(KERN_ERR "Warning: clock= override failed. Defaulting " - "to PIT\n"); - init_cpu_khz(); - count_p = LATCH; - return 0; -} - -static void mark_offset_pit(void) -{ - /* nothing needed */ -} - -static unsigned long long monotonic_clock_pit(void) -{ - return 0; -} - -static void delay_pit(unsigned long loops) -{ - int d0; - __asm__ __volatile__( - "\tjmp 1f\n" - ".align 16\n" - "1:\tjmp 2f\n" - ".align 16\n" - "2:\tdecl %0\n\tjns 2b" - :"=&a" (d0) - :"0" (loops)); -} - - -/* This function must be called with xtime_lock held. - * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs - * - * However, the pc-audio speaker driver changes the divisor so that - * it gets interrupted rather more often - it loads 64 into the - * counter rather than 11932! This has an adverse impact on - * do_gettimeoffset() -- it stops working! What is also not - * good is that the interval that our timer function gets called - * is no longer 10.0002 ms, but 9.9767 ms. To get around this - * would require using a different timing source. Maybe someone - * could use the RTC - I know that this can interrupt at frequencies - * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix - * it so that at startup, the timer code in sched.c would select - * using either the RTC or the 8253 timer. The decision would be - * based on whether there was any other device around that needed - * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz, - * and then do some jiggery to have a version of do_timer that - * advanced the clock by 1/1024 s. Every time that reached over 1/100 - * of a second, then do all the old code. If the time was kept correct - * then do_gettimeoffset could just return 0 - there is no low order - * divider that can be accessed. - * - * Ideally, you would be able to use the RTC for the speaker driver, - * but it appears that the speaker driver really needs interrupt more - * often than every 120 us or so. - * - * Anyway, this needs more thought.... pjsg (1993-08-28) - * - * If you are really that interested, you should be reading - * comp.protocols.time.ntp! - */ - -static unsigned long get_offset_pit(void) -{ - int count; - unsigned long flags; - static unsigned long jiffies_p = 0; - - /* - * cache volatile jiffies temporarily; we have xtime_lock. - */ - unsigned long jiffies_t; - - spin_lock_irqsave(&i8253_lock, flags); - /* timer count may underflow right here */ - outb_p(0x00, PIT_MODE); /* latch the count ASAP */ - - count = inb_p(PIT_CH0); /* read the latched count */ - - /* - * We do this guaranteed double memory access instead of a _p - * postfix in the previous port access. Wheee, hackady hack - */ - jiffies_t = jiffies; - - count |= inb_p(PIT_CH0) << 8; - - /* VIA686a test code... reset the latch if count > max + 1 */ - if (count > LATCH) { - outb_p(0x34, PIT_MODE); - outb_p(LATCH & 0xff, PIT_CH0); - outb(LATCH >> 8, PIT_CH0); - count = LATCH - 1; - } - - /* - * avoiding timer inconsistencies (they are rare, but they happen)... - * there are two kinds of problems that must be avoided here: - * 1. the timer counter underflows - * 2. hardware problem with the timer, not giving us continuous time, - * the counter does small "jumps" upwards on some Pentium systems, - * (see c't 95/10 page 335 for Neptun bug.) - */ - - if( jiffies_t == jiffies_p ) { - if( count > count_p ) { - /* the nutcase */ - count = do_timer_overflow(count); - } - } else - jiffies_p = jiffies_t; - - count_p = count; - - spin_unlock_irqrestore(&i8253_lock, flags); - - count = ((LATCH-1) - count) * TICK_SIZE; - count = (count + LATCH/2) / LATCH; - - return count; -} - - -/* tsc timer_opts struct */ -struct timer_opts timer_pit = { - .name = "pit", - .mark_offset = mark_offset_pit, - .get_offset = get_offset_pit, - .monotonic_clock = monotonic_clock_pit, - .delay = delay_pit, -}; - -struct init_timer_opts __initdata timer_pit_init = { - .init = init_pit, - .opts = &timer_pit, -}; - -void setup_pit_timer(void) -{ - unsigned long flags; - - spin_lock_irqsave(&i8253_lock, flags); - outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ - udelay(10); - outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ - udelay(10); - outb(LATCH >> 8 , PIT_CH0); /* MSB */ - spin_unlock_irqrestore(&i8253_lock, flags); -} Index: linux.prev/arch/i386/kernel/timers/timer_pm.c =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/timer_pm.c +++ /dev/null @@ -1,268 +0,0 @@ -/* - * (C) Dominik Brodowski 2003 - * - * Driver to use the Power Management Timer (PMTMR) available in some - * southbridges as primary timing source for the Linux kernel. - * - * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, - * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. - * - * This file is licensed under the GPL v2. - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include "mach_timer.h" - -/* Number of PMTMR ticks expected during calibration run */ -#define PMTMR_TICKS_PER_SEC 3579545 -#define PMTMR_EXPECTED_RATE \ - ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10)) - - -/* The I/O port the PMTMR resides at. - * The location is detected during setup_arch(), - * in arch/i386/acpi/boot.c */ -u32 pmtmr_ioport = 0; - - -/* value of the Power timer at last timer interrupt */ -static u32 offset_tick; -static u32 offset_delay; - -static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; - -#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ - -/*helper function to safely read acpi pm timesource*/ -static inline u32 read_pmtmr(void) -{ - u32 v1=0,v2=0,v3=0; - /* It has been reported that because of various broken - * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time - * source is not latched, so you must read it multiple - * times to insure a safe value is read. - */ - do { - v1 = inl(pmtmr_ioport); - v2 = inl(pmtmr_ioport); - v3 = inl(pmtmr_ioport); - } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) - || (v3 > v1 && v3 < v2)); - - /* mask the output to 24 bits */ - return v2 & ACPI_PM_MASK; -} - - -/* - * Some boards have the PMTMR running way too fast. We check - * the PMTMR rate against PIT channel 2 to catch these cases. - */ -static int verify_pmtmr_rate(void) -{ - u32 value1, value2; - unsigned long count, delta; - - mach_prepare_counter(); - value1 = read_pmtmr(); - mach_countup(&count); - value2 = read_pmtmr(); - delta = (value2 - value1) & ACPI_PM_MASK; - - /* Check that the PMTMR delta is within 5% of what we expect */ - if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 || - delta > (PMTMR_EXPECTED_RATE * 21) / 20) { - printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE); - return -1; - } - - return 0; -} - - -static int init_pmtmr(char* override) -{ - u32 value1, value2; - unsigned int i; - - if (override[0] && strncmp(override,"pmtmr",5)) - return -ENODEV; - - if (!pmtmr_ioport) - return -ENODEV; - - /* we use the TSC for delay_pmtmr, so make sure it exists */ - if (!cpu_has_tsc) - return -ENODEV; - - /* "verify" this timing source */ - value1 = read_pmtmr(); - for (i = 0; i < 10000; i++) { - value2 = read_pmtmr(); - if (value2 == value1) - continue; - if (value2 > value1) - goto pm_good; - if ((value2 < value1) && ((value2) < 0xFFF)) - goto pm_good; - printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2); - return -EINVAL; - } - printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1); - return -ENODEV; - -pm_good: - if (verify_pmtmr_rate() != 0) - return -ENODEV; - - init_cpu_khz(); - return 0; -} - -static inline u32 cyc2us(u32 cycles) -{ - /* The Power Management Timer ticks at 3.579545 ticks per microsecond. - * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] - * - * Even with HZ = 100, delta is at maximum 35796 ticks, so it can - * easily be multiplied with 286 (=0x11E) without having to fear - * u32 overflows. - */ - cycles *= 286; - return (cycles >> 10); -} - -/* - * this gets called during each timer interrupt - * - Called while holding the writer xtime_lock - */ -static void mark_offset_pmtmr(void) -{ - u32 lost, delta, last_offset; - static int first_run = 1; - last_offset = offset_tick; - - write_seqlock(&monotonic_lock); - - offset_tick = read_pmtmr(); - - /* calculate tick interval */ - delta = (offset_tick - last_offset) & ACPI_PM_MASK; - - /* convert to usecs */ - delta = cyc2us(delta); - - /* update the monotonic base value */ - monotonic_base += delta * NSEC_PER_USEC; - write_sequnlock(&monotonic_lock); - - /* convert to ticks */ - delta += offset_delay; - lost = delta / (USEC_PER_SEC / HZ); - offset_delay = delta % (USEC_PER_SEC / HZ); - - - /* compensate for lost ticks */ - if (lost >= 2) - jiffies_64 += lost - 1; - - /* don't calculate delay for first run, - or if we've got less then a tick */ - if (first_run || (lost < 1)) { - first_run = 0; - offset_delay = 0; - } -} - -static int pmtmr_resume(void) -{ - write_seqlock(&monotonic_lock); - /* Assume this is the last mark offset time */ - offset_tick = read_pmtmr(); - write_sequnlock(&monotonic_lock); - return 0; -} - -static unsigned long long monotonic_clock_pmtmr(void) -{ - u32 last_offset, this_offset; - unsigned long long base, ret; - unsigned seq; - - - /* atomically read monotonic base & last_offset */ - do { - seq = read_seqbegin(&monotonic_lock); - last_offset = offset_tick; - base = monotonic_base; - } while (read_seqretry(&monotonic_lock, seq)); - - /* Read the pmtmr */ - this_offset = read_pmtmr(); - - /* convert to nanoseconds */ - ret = (this_offset - last_offset) & ACPI_PM_MASK; - ret = base + (cyc2us(ret) * NSEC_PER_USEC); - return ret; -} - -static void delay_pmtmr(unsigned long loops) -{ - unsigned long bclock, now; - - rdtscl(bclock); - do - { - rep_nop(); - rdtscl(now); - } while ((now-bclock) < loops); -} - - -/* - * get the offset (in microseconds) from the last call to mark_offset() - * - Called holding a reader xtime_lock - */ -static unsigned long get_offset_pmtmr(void) -{ - u32 now, offset, delta = 0; - - offset = offset_tick; - now = read_pmtmr(); - delta = (now - offset)&ACPI_PM_MASK; - - return (unsigned long) offset_delay + cyc2us(delta); -} - - -/* acpi timer_opts struct */ -static struct timer_opts timer_pmtmr = { - .name = "pmtmr", - .mark_offset = mark_offset_pmtmr, - .get_offset = get_offset_pmtmr, - .monotonic_clock = monotonic_clock_pmtmr, - .delay = delay_pmtmr, - .read_timer = read_timer_tsc, - .resume = pmtmr_resume, -}; - -struct init_timer_opts __initdata timer_pmtmr_init = { - .init = init_pmtmr, - .opts = &timer_pmtmr, -}; - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Dominik Brodowski "); -MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86"); Index: linux.prev/arch/i386/kernel/timers/timer_tsc.c =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/timer_tsc.c +++ /dev/null @@ -1,600 +0,0 @@ -/* - * This code largely moved from arch/i386/kernel/time.c. - * See comments there for proper credits. - * - * 2004-06-25 Jesper Juhl - * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4 - * failing to inline. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -/* processor.h for distable_tsc flag */ -#include - -#include "io_ports.h" -#include "mach_timer.h" - -#include -#include - -#ifdef CONFIG_HPET_TIMER -static unsigned long hpet_usec_quotient; -static unsigned long hpet_last; -static struct timer_opts timer_tsc; -#endif - -static inline void cpufreq_delayed_get(void); - -int tsc_disable __devinitdata = 0; - -static int use_tsc; -/* Number of usecs that the last interrupt was delayed */ -static int delay_at_last_interrupt; - -static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ -static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ -static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; - -/* convert from cycles(64bits) => nanoseconds (64bits) - * basic equation: - * ns = cycles / (freq / ns_per_sec) - * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_khz * 10^3)) - * ns = cycles * (10^6 / cpu_khz) - * - * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^6 * SC / cpu_khz) / SC - * ns = cycles * cyc2ns_scale / SC - * - * And since SC is a constant power of two, we can convert the div - * into a shift. - * - * We can use khz divisor instead of mhz to keep a better percision, since - * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. - * (mathieu.desnoyers@polymtl.ca) - * - * -johnstul@us.ibm.com "math is hard, lets go shopping!" - */ -static unsigned long cyc2ns_scale; -#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ - -static inline void set_cyc2ns_scale(unsigned long cpu_khz) -{ - cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; -} - -static inline unsigned long long cycles_2_ns(unsigned long long cyc) -{ - return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; -} - -static int count2; /* counter for mark_offset_tsc() */ - -/* Cached *multiplier* to convert TSC counts to microseconds. - * (see the equation below). - * Equal to 2^32 * (1 / (clocks per usec) ). - * Initialized in time_init. - */ -static unsigned long fast_gettimeoffset_quotient; - -static unsigned long get_offset_tsc(void) -{ - register unsigned long eax, edx; - - /* Read the Time Stamp Counter */ - - rdtsc(eax,edx); - - /* .. relative to previous jiffy (32 bits is enough) */ - eax -= last_tsc_low; /* tsc_low delta */ - - /* - * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient - * = (tsc_low delta) * (usecs_per_clock) - * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy) - * - * Using a mull instead of a divl saves up to 31 clock cycles - * in the critical path. - */ - - __asm__("mull %2" - :"=a" (eax), "=d" (edx) - :"rm" (fast_gettimeoffset_quotient), - "0" (eax)); - - /* our adjusted time offset in microseconds */ - return delay_at_last_interrupt + edx; -} - -static unsigned long long monotonic_clock_tsc(void) -{ - unsigned long long last_offset, this_offset, base; - unsigned seq; - - /* atomically read monotonic base & last_offset */ - do { - seq = read_seqbegin(&monotonic_lock); - last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - base = monotonic_base; - } while (read_seqretry(&monotonic_lock, seq)); - - /* Read the Time Stamp Counter */ - rdtscll(this_offset); - - /* return the value in ns */ - return base + cycles_2_ns(this_offset - last_offset); -} - -/* - * Scheduler clock - returns current time in nanosec units. - */ -unsigned long long sched_clock(void) -{ - unsigned long long this_offset; - - /* - * In the NUMA case we dont use the TSC as they are not - * synchronized across all CPUs. - */ -#ifndef CONFIG_NUMA - if (!use_tsc) -#endif - /* no locking but a rare wrong value is not a big deal */ - return jiffies_64 * (1000000000 / HZ); - - /* Read the Time Stamp Counter */ - rdtscll(this_offset); - - /* return the value in ns */ - return cycles_2_ns(this_offset); -} - -static void delay_tsc(unsigned long loops) -{ - unsigned long bclock, now; - - rdtscl(bclock); - do - { - rep_nop(); - rdtscl(now); - } while ((now-bclock) < loops); -} - -#ifdef CONFIG_HPET_TIMER -static void mark_offset_tsc_hpet(void) -{ - unsigned long long this_offset, last_offset; - unsigned long offset, temp, hpet_current; - - write_seqlock(&monotonic_lock); - last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - /* - * It is important that these two operations happen almost at - * the same time. We do the RDTSC stuff first, since it's - * faster. To avoid any inconsistencies, we need interrupts - * disabled locally. - */ - /* - * Interrupts are just disabled locally since the timer irq - * has the SA_INTERRUPT flag set. -arca - */ - /* read Pentium cycle counter */ - - hpet_current = hpet_readl(HPET_COUNTER); - rdtsc(last_tsc_low, last_tsc_high); - - /* lost tick compensation */ - offset = hpet_readl(HPET_T0_CMP) - hpet_tick; - if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) { - int lost_ticks = (offset - hpet_last) / hpet_tick; - jiffies_64 += lost_ticks; - } - hpet_last = hpet_current; - - /* update the monotonic base value */ - this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - monotonic_base += cycles_2_ns(this_offset - last_offset); - write_sequnlock(&monotonic_lock); - - /* calculate delay_at_last_interrupt */ - /* - * Time offset = (hpet delta) * ( usecs per HPET clock ) - * = (hpet delta) * ( usecs per tick / HPET clocks per tick) - * = (hpet delta) * ( hpet_usec_quotient ) / (2^32) - * Where, - * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick - */ - delay_at_last_interrupt = hpet_current - offset; - ASM_MUL64_REG(temp, delay_at_last_interrupt, - hpet_usec_quotient, delay_at_last_interrupt); -} -#endif - - -#ifdef CONFIG_CPU_FREQ -#include - -static unsigned int cpufreq_delayed_issched = 0; -static unsigned int cpufreq_init = 0; -static struct work_struct cpufreq_delayed_get_work; - -static void handle_cpufreq_delayed_get(void *v) -{ - unsigned int cpu; - for_each_online_cpu(cpu) { - cpufreq_get(cpu); - } - cpufreq_delayed_issched = 0; -} - -/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries - * to verify the CPU frequency the timing core thinks the CPU is running - * at is still correct. - */ -static inline void cpufreq_delayed_get(void) -{ - if (cpufreq_init && !cpufreq_delayed_issched) { - cpufreq_delayed_issched = 1; - printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n"); - schedule_work(&cpufreq_delayed_get_work); - } -} - -/* If the CPU frequency is scaled, TSC-based delays will need a different - * loops_per_jiffy value to function properly. - */ - -static unsigned int ref_freq = 0; -static unsigned long loops_per_jiffy_ref = 0; - -#ifndef CONFIG_SMP -static unsigned long fast_gettimeoffset_ref = 0; -static unsigned int cpu_khz_ref = 0; -#endif - -static int -time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data) -{ - struct cpufreq_freqs *freq = data; - - if (val != CPUFREQ_RESUMECHANGE) - write_seqlock_irq(&xtime_lock); - if (!ref_freq) { - ref_freq = freq->old; - loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; -#ifndef CONFIG_SMP - fast_gettimeoffset_ref = fast_gettimeoffset_quotient; - cpu_khz_ref = cpu_khz; -#endif - } - - if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || - (val == CPUFREQ_RESUMECHANGE)) { - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); -#ifndef CONFIG_SMP - if (cpu_khz) - cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); - if (use_tsc) { - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { - fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq); - set_cyc2ns_scale(cpu_khz); - } - } -#endif - } - - if (val != CPUFREQ_RESUMECHANGE) - write_sequnlock_irq(&xtime_lock); - - return 0; -} - -static struct notifier_block time_cpufreq_notifier_block = { - .notifier_call = time_cpufreq_notifier -}; - - -static int __init cpufreq_tsc(void) -{ - int ret; - INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); - ret = cpufreq_register_notifier(&time_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - if (!ret) - cpufreq_init = 1; - return ret; -} -core_initcall(cpufreq_tsc); - -#else /* CONFIG_CPU_FREQ */ -static inline void cpufreq_delayed_get(void) { return; } -#endif - -int recalibrate_cpu_khz(void) -{ -#ifndef CONFIG_SMP - unsigned int cpu_khz_old = cpu_khz; - - if (cpu_has_tsc) { - init_cpu_khz(); - cpu_data[0].loops_per_jiffy = - cpufreq_scale(cpu_data[0].loops_per_jiffy, - cpu_khz_old, - cpu_khz); - return 0; - } else - return -ENODEV; -#else - return -ENODEV; -#endif -} -EXPORT_SYMBOL(recalibrate_cpu_khz); - -static void mark_offset_tsc(void) -{ - unsigned long lost,delay; - unsigned long delta = last_tsc_low; - int count; - int countmp; - static int count1 = 0; - unsigned long long this_offset, last_offset; - static int lost_count = 0; - - write_seqlock(&monotonic_lock); - last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - /* - * It is important that these two operations happen almost at - * the same time. We do the RDTSC stuff first, since it's - * faster. To avoid any inconsistencies, we need interrupts - * disabled locally. - */ - - /* - * Interrupts are just disabled locally since the timer irq - * has the SA_INTERRUPT flag set. -arca - */ - - /* read Pentium cycle counter */ - - rdtsc(last_tsc_low, last_tsc_high); - - spin_lock(&i8253_lock); - outb_p(0x00, PIT_MODE); /* latch the count ASAP */ - - count = inb_p(PIT_CH0); /* read the latched count */ - count |= inb(PIT_CH0) << 8; - - /* - * VIA686a test code... reset the latch if count > max + 1 - * from timer_pit.c - cjb - */ - if (count > LATCH) { - outb_p(0x34, PIT_MODE); - outb_p(LATCH & 0xff, PIT_CH0); - outb(LATCH >> 8, PIT_CH0); - count = LATCH - 1; - } - - spin_unlock(&i8253_lock); - - if (pit_latch_buggy) { - /* get center value of last 3 time lutch */ - if ((count2 >= count && count >= count1) - || (count1 >= count && count >= count2)) { - count2 = count1; count1 = count; - } else if ((count1 >= count2 && count2 >= count) - || (count >= count2 && count2 >= count1)) { - countmp = count;count = count2; - count2 = count1;count1 = countmp; - } else { - count2 = count1; count1 = count; count = count1; - } - } - - /* lost tick compensation */ - delta = last_tsc_low - delta; - { - register unsigned long eax, edx; - eax = delta; - __asm__("mull %2" - :"=a" (eax), "=d" (edx) - :"rm" (fast_gettimeoffset_quotient), - "0" (eax)); - delta = edx; - } - delta += delay_at_last_interrupt; - lost = delta/(1000000/HZ); - delay = delta%(1000000/HZ); - if (lost >= 2) { - jiffies_64 += lost-1; - - /* sanity check to ensure we're not always losing ticks */ - if (lost_count++ > 100) { - printk(KERN_WARNING "Losing too many ticks!\n"); - printk(KERN_WARNING "TSC cannot be used as a timesource. \n"); - printk(KERN_WARNING "Possible reasons for this are:\n"); - printk(KERN_WARNING " You're running with Speedstep,\n"); - printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n"); - printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n"); - printk(KERN_WARNING "Falling back to a sane timesource now.\n"); - - clock_fallback(); - } - /* ... but give the TSC a fair chance */ - if (lost_count > 25) - cpufreq_delayed_get(); - } else - lost_count = 0; - /* update the monotonic base value */ - this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - monotonic_base += cycles_2_ns(this_offset - last_offset); - write_sequnlock(&monotonic_lock); - - /* calculate delay_at_last_interrupt */ - count = ((LATCH-1) - count) * TICK_SIZE; - delay_at_last_interrupt = (count + LATCH/2) / LATCH; - - /* catch corner case where tick rollover occured - * between tsc and pit reads (as noted when - * usec delta is > 90% # of usecs/tick) - */ - if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) - jiffies_64++; -} - -static int __init init_tsc(char* override) -{ - - /* check clock override */ - if (override[0] && strncmp(override,"tsc",3)) { -#ifdef CONFIG_HPET_TIMER - if (is_hpet_enabled()) { - printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n"); - } else -#endif - { - return -ENODEV; - } - } - - /* - * If we have APM enabled or the CPU clock speed is variable - * (CPU stops clock on HLT or slows clock to save power) - * then the TSC timestamps may diverge by up to 1 jiffy from - * 'real time' but nothing will break. - * The most frequent case is that the CPU is "woken" from a halt - * state by the timer interrupt itself, so we get 0 error. In the - * rare cases where a driver would "wake" the CPU and request a - * timestamp, the maximum error is < 1 jiffy. But timestamps are - * still perfectly ordered. - * Note that the TSC counter will be reset if APM suspends - * to disk; this won't break the kernel, though, 'cuz we're - * smart. See arch/i386/kernel/apm.c. - */ - /* - * Firstly we have to do a CPU check for chips with - * a potentially buggy TSC. At this point we haven't run - * the ident/bugs checks so we must run this hook as it - * may turn off the TSC flag. - * - * NOTE: this doesn't yet handle SMP 486 machines where only - * some CPU's have a TSC. Thats never worked and nobody has - * moaned if you have the only one in the world - you fix it! - */ - - count2 = LATCH; /* initialize counter for mark_offset_tsc() */ - - if (cpu_has_tsc) { - unsigned long tsc_quotient; -#ifdef CONFIG_HPET_TIMER - if (is_hpet_enabled() && hpet_use_timer) { - unsigned long result, remain; - printk("Using TSC for gettimeofday\n"); - tsc_quotient = calibrate_tsc_hpet(NULL); - timer_tsc.mark_offset = &mark_offset_tsc_hpet; - /* - * Math to calculate hpet to usec multiplier - * Look for the comments at get_offset_tsc_hpet() - */ - ASM_DIV64_REG(result, remain, hpet_tick, - 0, KERNEL_TICK_USEC); - if (remain > (hpet_tick >> 1)) - result++; /* rounding the result */ - - hpet_usec_quotient = result; - } else -#endif - { - tsc_quotient = calibrate_tsc(); - } - - if (tsc_quotient) { - fast_gettimeoffset_quotient = tsc_quotient; - use_tsc = 1; - /* - * We could be more selective here I suspect - * and just enable this for the next intel chips ? - */ - /* report CPU clock rate in Hz. - * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = - * clock/second. Our precision is about 100 ppm. - */ - { unsigned long eax=0, edx=1000; - __asm__("divl %2" - :"=a" (cpu_khz), "=d" (edx) - :"r" (tsc_quotient), - "0" (eax), "1" (edx)); - printk("Detected %u.%03u MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - } - set_cyc2ns_scale(cpu_khz); - return 0; - } - } - return -ENODEV; -} - -static int tsc_resume(void) -{ - write_seqlock(&monotonic_lock); - /* Assume this is the last mark offset time */ - rdtsc(last_tsc_low, last_tsc_high); -#ifdef CONFIG_HPET_TIMER - if (is_hpet_enabled() && hpet_use_timer) - hpet_last = hpet_readl(HPET_COUNTER); -#endif - write_sequnlock(&monotonic_lock); - return 0; -} - -#ifndef CONFIG_X86_TSC -/* disable flag for tsc. Takes effect by clearing the TSC cpu flag - * in cpu/common.c */ -static int __init tsc_setup(char *str) -{ - tsc_disable = 1; - return 1; -} -#else -static int __init tsc_setup(char *str) -{ - printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " - "cannot disable TSC.\n"); - return 1; -} -#endif -__setup("notsc", tsc_setup); - - - -/************************************************************/ - -/* tsc timer_opts struct */ -static struct timer_opts timer_tsc = { - .name = "tsc", - .mark_offset = mark_offset_tsc, - .get_offset = get_offset_tsc, - .monotonic_clock = monotonic_clock_tsc, - .delay = delay_tsc, - .read_timer = read_timer_tsc, - .resume = tsc_resume, -}; - -struct init_timer_opts __initdata timer_tsc_init = { - .init = init_tsc, - .opts = &timer_tsc, -}; Index: linux.prev/arch/i386/kernel/traps.c =================================================================== --- linux.prev.orig/arch/i386/kernel/traps.c +++ linux.prev/arch/i386/kernel/traps.c @@ -93,7 +93,7 @@ asmlinkage void machine_check(void); static int kstack_depth_to_print = 24; struct notifier_block *i386die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); +static DEFINE_RAW_SPINLOCK(die_notifier_lock); int register_die_notifier(struct notifier_block *nb) { @@ -116,22 +116,27 @@ static inline unsigned long print_contex unsigned long *stack, unsigned long ebp) { unsigned long addr; +#ifndef CONFIG_FRAME_POINTER + unsigned long prev_frame; +#endif -#ifdef CONFIG_FRAME_POINTER +#ifdef CONFIG_FRAME_POINTER while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); printk(" [<%08lx>] ", addr); print_symbol("%s", addr); - printk("\n"); + printk(" (%ld)\n", *(unsigned long *)ebp - ebp); ebp = *(unsigned long *)ebp; } #else + prev_frame = (unsigned long)stack; while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; if (__kernel_text_address(addr)) { printk(" [<%08lx>]", addr); print_symbol(" %s", addr); - printk("\n"); + printk(" (%ld)\n", (unsigned long)stack - prev_frame); + prev_frame = (unsigned long)stack; } } #endif @@ -163,6 +168,8 @@ void show_trace(struct task_struct *task break; printk(" =======================\n"); } + print_traces(task); + show_held_locks(task); } void show_stack(struct task_struct *task, unsigned long *esp) @@ -201,6 +208,12 @@ void dump_stack(void) EXPORT_SYMBOL(dump_stack); +#if defined(CONFIG_DEBUG_STACKOVERFLOW) && defined(CONFIG_LATENCY_TRACE) +extern unsigned long worst_stack_left; +#else +# define worst_stack_left -1L +#endif + void show_registers(struct pt_regs *regs) { int i; @@ -225,10 +238,17 @@ void show_registers(struct pt_regs *regs regs->eax, regs->ebx, regs->ecx, regs->edx); printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->esi, regs->edi, regs->ebp, esp); - printk("ds: %04x es: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, ss); - printk("Process %s (pid: %d, threadinfo=%p task=%p)", + printk("ds: %04x es: %04x ss: %04x preempt: %08x\n", + regs->xds & 0xffff, regs->xes & 0xffff, ss, preempt_count()); + printk("Process %s (pid: %d, threadinfo=%p task=%p", current->comm, current->pid, current_thread_info(), current); + + if (in_kernel) + printk(" stack_left=%ld worst_left=%ld)", + (esp & (THREAD_SIZE-1))-sizeof(struct thread_info), + worst_stack_left); + else + printk(")"); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. @@ -297,11 +317,11 @@ bug: void die(const char * str, struct pt_regs * regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = RAW_SPIN_LOCK_UNLOCKED, .lock_owner = -1, .lock_owner_depth = 0 }; @@ -379,6 +399,11 @@ static void __kprobes do_trap(int trapnr if (!user_mode(regs)) goto kernel_trap; +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif + trap_signal: { if (info) force_sig_info(signr, info, tsk); @@ -509,7 +534,7 @@ fastcall void __kprobes do_general_prote return; gp_in_vm86: - local_irq_enable(); + raw_local_irq_enable(); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); return; @@ -563,10 +588,12 @@ static void unknown_nmi_error(unsigned c printk("Do you have a strange power saving mode enabled?\n"); } -static DEFINE_SPINLOCK(nmi_print_lock); +static DEFINE_RAW_SPINLOCK(nmi_print_lock); void die_nmi (struct pt_regs *regs, const char *msg) { + deadlock_trace_off(); + if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) == NOTIFY_STOP) return; @@ -594,10 +621,11 @@ void die_nmi (struct pt_regs *regs, cons crash_kexec(regs); } + nmi_exit(); do_exit(SIGSEGV); } -static void default_do_nmi(struct pt_regs * regs) +static void notrace default_do_nmi(struct pt_regs * regs) { unsigned char reason = 0; @@ -616,6 +644,7 @@ static void default_do_nmi(struct pt_reg */ if (nmi_watchdog) { nmi_watchdog_tick(regs); +// trace_special(6, 1, 0); return; } #endif @@ -635,18 +664,19 @@ static void default_do_nmi(struct pt_reg reassert_nmi(); } -static int dummy_nmi_callback(struct pt_regs * regs, int cpu) +static notrace int dummy_nmi_callback(struct pt_regs * regs, int cpu) { return 0; } static nmi_callback_t nmi_callback = dummy_nmi_callback; -fastcall void do_nmi(struct pt_regs * regs, long error_code) +fastcall notrace void do_nmi(struct pt_regs * regs, long error_code) { int cpu; nmi_enter(); + nmi_trace((unsigned long)do_nmi, regs->eip, regs->eflags); cpu = smp_processor_id(); @@ -717,7 +747,7 @@ fastcall void __kprobes do_debug(struct return; /* It's safe to allow irq's after DR6 has been saved */ if (regs->eflags & X86_EFLAGS_IF) - local_irq_enable(); + raw_local_irq_enable(); /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { Index: linux.prev/arch/i386/kernel/tsc.c =================================================================== --- /dev/null +++ linux.prev/arch/i386/kernel/tsc.c @@ -0,0 +1,395 @@ +/* + * This code largely moved from arch/i386/kernel/timer/timer_tsc.c + * which was originally moved from arch/i386/kernel/time.c. + * See comments there for proper credits. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "mach_timer.h" + +/* + * On some systems the TSC frequency does not + * change with the cpu frequency. So we need + * an extra value to store the TSC freq + */ +unsigned int tsc_khz; + +int tsc_disable __initdata = 0; + +#ifdef CONFIG_X86_TSC +static int __init tsc_setup(char *str) +{ + printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " + "cannot disable TSC.\n"); + return 1; +} +#else +/* + * disable flag for tsc. Takes effect by clearing the TSC cpu flag + * in cpu/common.c + */ +static int __init tsc_setup(char *str) +{ + tsc_disable = 1; + + return 1; +} +#endif + +__setup("notsc", tsc_setup); + +/* + * code to mark and check if the TSC is unstable + * due to cpufreq or due to unsynced TSCs + */ +static int tsc_unstable; + +static inline int check_tsc_unstable(void) +{ + return tsc_unstable; +} + +void mark_tsc_unstable(void) +{ + tsc_unstable = 1; +} +EXPORT_SYMBOL_GPL(mark_tsc_unstable); + +/* Accellerators for sched_clock() + * convert from cycles(64bits) => nanoseconds (64bits) + * basic equation: + * ns = cycles / (freq / ns_per_sec) + * ns = cycles * (ns_per_sec / freq) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) + * + * Then we use scaling math (suggested by george@mvista.com) to get: + * ns = cycles * (10^6 * SC / cpu_khz) / SC + * ns = cycles * cyc2ns_scale / SC + * + * And since SC is a constant power of two, we can convert the div + * into a shift. + * + * We can use khz divisor instead of mhz to keep a better percision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * (mathieu.desnoyers@polymtl.ca) + * + * -johnstul@us.ibm.com "math is hard, lets go shopping!" + */ +static unsigned long cyc2ns_scale; + +#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ + +static inline void set_cyc2ns_scale(unsigned long cpu_khz) +{ + cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; +} + +static inline unsigned long long cycles_2_ns(unsigned long long cyc) +{ + return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; +} + +/* + * Scheduler clock - returns current time in nanosec units. + */ +unsigned long long sched_clock(void) +{ + unsigned long long this_offset; + + /* + * in the NUMA case we dont use the TSC as they are not + * synchronized across all CPUs. + */ +#ifndef CONFIG_NUMA + if (!cpu_khz || check_tsc_unstable()) +#endif + /* no locking but a rare wrong value is not a big deal */ + return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); + + /* read the Time Stamp Counter: */ + rdtscll(this_offset); + + /* return the value in ns */ + return cycles_2_ns(this_offset); +} + +static unsigned long calculate_cpu_khz(void) +{ + unsigned long long start, end; + unsigned long count; + u64 delta64; + int i; + unsigned long flags; + + raw_local_irq_save(flags); + + /* run 3 times to ensure the cache is warm */ + for (i = 0; i < 3; i++) { + mach_prepare_counter(); + rdtscll(start); + mach_countup(&count); + rdtscll(end); + } + /* + * Error: ECTCNEVERSET + * The CTC wasn't reliable: we got a hit on the very first read, + * or the CPU was so fast/slow that the quotient wouldn't fit in + * 32 bits.. + */ + if (count <= 1) + goto err; + + delta64 = end - start; + + /* cpu freq too fast: */ + if (delta64 > (1ULL<<32)) + goto err; + + /* cpu freq too slow: */ + if (delta64 <= CALIBRATE_TIME_MSEC) + goto err; + + delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */ + do_div(delta64,CALIBRATE_TIME_MSEC); + + raw_local_irq_restore(flags); + return (unsigned long)delta64; +err: + raw_local_irq_restore(flags); + return 0; +} + +int recalibrate_cpu_khz(void) +{ +#ifndef CONFIG_SMP + unsigned long cpu_khz_old = cpu_khz; + + if (cpu_has_tsc) { + cpu_khz = calculate_cpu_khz(); + tsc_khz = cpu_khz; + cpu_data[0].loops_per_jiffy = + cpufreq_scale(cpu_data[0].loops_per_jiffy, + cpu_khz_old, cpu_khz); + return 0; + } else + return -ENODEV; +#else + return -ENODEV; +#endif +} + +EXPORT_SYMBOL(recalibrate_cpu_khz); + +void tsc_init(void) +{ + if (!cpu_has_tsc || tsc_disable) + return; + + cpu_khz = calculate_cpu_khz(); + tsc_khz = cpu_khz; + + if (!cpu_khz) + return; + + printk("Detected %lu.%03lu MHz processor.\n", + (unsigned long)cpu_khz / 1000, + (unsigned long)cpu_khz % 1000); + + set_cyc2ns_scale(cpu_khz); + use_tsc_delay(); +} + +#ifdef CONFIG_CPU_FREQ + +static unsigned int cpufreq_delayed_issched = 0; +static unsigned int cpufreq_init = 0; +static struct work_struct cpufreq_delayed_get_work; + +static void handle_cpufreq_delayed_get(void *v) +{ + unsigned int cpu; + + for_each_online_cpu(cpu) + cpufreq_get(cpu); + + cpufreq_delayed_issched = 0; +} + +/* + * if we notice cpufreq oddness, schedule a call to cpufreq_get() as it tries + * to verify the CPU frequency the timing core thinks the CPU is running + * at is still correct. + */ +static inline void cpufreq_delayed_get(void) +{ + if (cpufreq_init && !cpufreq_delayed_issched) { + cpufreq_delayed_issched = 1; + printk(KERN_DEBUG "Checking if CPU frequency changed.\n"); + schedule_work(&cpufreq_delayed_get_work); + } +} + +/* + * if the CPU frequency is scaled, TSC-based delays will need a different + * loops_per_jiffy value to function properly. + */ +static unsigned int ref_freq = 0; +static unsigned long loops_per_jiffy_ref = 0; +static unsigned long cpu_khz_ref = 0; + +static int +time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + + if (val != CPUFREQ_RESUMECHANGE) + write_seqlock_irq(&xtime_lock); + + if (!ref_freq) { + ref_freq = freq->old; + loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; + cpu_khz_ref = cpu_khz; + } + + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || + (val == CPUFREQ_RESUMECHANGE)) { + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) + cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); + + if (cpu_khz) { + + if (num_online_cpus() == 1) + cpu_khz = cpufreq_scale(cpu_khz_ref, + ref_freq, freq->new); + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { + tsc_khz = cpu_khz; + set_cyc2ns_scale(cpu_khz); + /* + * TSC based sched_clock turns + * to junk w/ cpufreq + */ + mark_tsc_unstable(); + } + } + } + + if (val != CPUFREQ_RESUMECHANGE) + write_sequnlock_irq(&xtime_lock); + + return 0; +} + +static struct notifier_block time_cpufreq_notifier_block = { + .notifier_call = time_cpufreq_notifier +}; + +static int __init cpufreq_tsc(void) +{ + int ret; + + INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); + ret = cpufreq_register_notifier(&time_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + if (!ret) + cpufreq_init = 1; + + return ret; +} + +core_initcall(cpufreq_tsc); + +#endif + +/* clock source code */ + +static unsigned long current_tsc_khz = 0; +static int tsc_update_callback(void); + +static cycle_t read_tsc(void) +{ + cycle_t ret; + + rdtscll(ret); + + return ret; +} + +static struct clocksource clocksource_tsc = { + .name = "tsc", + .rating = 300, + .read = read_tsc, + .mask = (cycle_t)-1, + .mult = 0, /* to be set */ + .shift = 22, + .update_callback = tsc_update_callback, + .is_continuous = 1, +}; + +static int tsc_update_callback(void) +{ + int change = 0; + + /* check to see if we should switch to the safe clocksource: */ + if (clocksource_tsc.rating != 50 && check_tsc_unstable()) { + clocksource_tsc.rating = 50; + reselect_clocksource(); + change = 1; + } + + /* only update if tsc_khz has changed: */ + if (current_tsc_khz != tsc_khz) { + current_tsc_khz = tsc_khz; + clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, + clocksource_tsc.shift); + change = 1; + } + + return change; +} + +/* + * Make an educated guess if the TSC is trustworthy and synchronized + * over all CPUs. + */ +static __init int unsynchronized_tsc(void) +{ + /* + * Intel systems are normally all synchronized. + * Exceptions must mark TSC as unstable: + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + return 0; + + /* assume multi socket systems are not synchronized: */ + return num_possible_cpus() > 1; +} + +/* NUMAQ can't use TSC: */ +static int __init init_tsc_clocksource(void) +{ + /* TSC initialization is done in arch/i386/kernel/tsc.c */ + if (cpu_has_tsc && tsc_khz && !tsc_disable) { + if (unsynchronized_tsc()) /* lower rating if unsynced */ + mark_tsc_unstable(); + current_tsc_khz = tsc_khz; + clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, + clocksource_tsc.shift); + register_clocksource(&clocksource_tsc); + } + + return 0; +} + +module_init(init_tsc_clocksource); Index: linux.prev/arch/i386/kernel/vm86.c =================================================================== --- linux.prev.orig/arch/i386/kernel/vm86.c +++ linux.prev/arch/i386/kernel/vm86.c @@ -105,9 +105,10 @@ struct pt_regs * fastcall save_v86_state * from process context. Enable interrupts here, before trying * to access user space. */ - local_irq_enable(); + raw_local_irq_enable(); if (!current->thread.vm86_info) { + raw_local_irq_disable(); printk("no vm86_info: BAD\n"); do_exit(SIGSEGV); } Index: linux.prev/arch/i386/lib/bitops.c =================================================================== --- linux.prev.orig/arch/i386/lib/bitops.c +++ linux.prev/arch/i386/lib/bitops.c @@ -68,3 +68,37 @@ int find_next_zero_bit(const unsigned lo return (offset + set + res); } EXPORT_SYMBOL(find_next_zero_bit); + + +/* + * rw spinlock fallbacks + */ +#if defined(CONFIG_SMP) +asm( +".section .sched.text\n" +".align 4\n" +".globl __write_lock_failed\n" +"__write_lock_failed:\n\t" + LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" +"1: rep; nop\n\t" + "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + "jne 1b\n\t" + LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + "jnz __write_lock_failed\n\t" + "ret" +); + +asm( +".section .sched.text\n" +".align 4\n" +".globl __read_lock_failed\n" +"__read_lock_failed:\n\t" + LOCK "incl (%eax)\n" +"1: rep; nop\n\t" + "cmpl $1,(%eax)\n\t" + "js 1b\n\t" + LOCK "decl (%eax)\n\t" + "js __read_lock_failed\n\t" + "ret" +); +#endif Index: linux.prev/arch/i386/lib/delay.c =================================================================== --- linux.prev.orig/arch/i386/lib/delay.c +++ linux.prev/arch/i386/lib/delay.c @@ -10,43 +10,93 @@ * we have to worry about. */ +#include +#include #include #include #include -#include + #include #include #include #ifdef CONFIG_SMP -#include +# include #endif -extern struct timer_opts* timer; +/* simple loop based delay: */ +static void delay_loop(unsigned long loops) +{ + int d0; + + __asm__ __volatile__( + "\tjmp 1f\n" + ".align 16\n" + "1:\tjmp 2f\n" + ".align 16\n" + "2:\tdecl %0\n\tjns 2b" + :"=&a" (d0) + :"0" (loops)); +} + +/* TSC based delay: */ +static void delay_tsc(unsigned long loops) +{ + unsigned long bclock, now; + + rdtscl(bclock); + do { + rep_nop(); + rdtscl(now); + } while ((now-bclock) < loops); +} + +/* + * Since we calibrate only once at boot, this + * function should be set once at boot and not changed + */ +static void (*delay_fn)(unsigned long) = delay_loop; + +void use_tsc_delay(void) +{ + delay_fn = delay_tsc; +} + +int read_current_timer(unsigned long *timer_val) +{ + if (delay_fn == delay_tsc) { + rdtscl(*timer_val); + return 0; + } + return -1; +} void __delay(unsigned long loops) { - cur_timer->delay(loops); + delay_fn(loops); } inline void __const_udelay(unsigned long xloops) { int d0; + xloops *= 4; __asm__("mull %0" :"=d" (xloops), "=&a" (d0) - :"1" (xloops),"0" (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); - __delay(++xloops); + :"1" (xloops), "0" + (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); + + __delay(++xloops); } void __udelay(unsigned long usecs) { - __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ + __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ } void __ndelay(unsigned long nsecs) { - __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ + __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ } EXPORT_SYMBOL(__delay); Index: linux.prev/arch/i386/mach-default/setup.c =================================================================== --- linux.prev.orig/arch/i386/mach-default/setup.c +++ linux.prev/arch/i386/mach-default/setup.c @@ -34,7 +34,7 @@ void __init pre_intr_init_hook(void) /* * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, SA_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL}; /** * intr_init_hook - post gate setup interrupt initialisation @@ -78,8 +78,6 @@ void __init trap_init_hook(void) { } -static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL}; - /** * time_init_hook - do any specific initialisations for the system timer. * @@ -89,7 +87,6 @@ static struct irqaction irq0 = { timer_ **/ void __init time_init_hook(void) { - setup_irq(0, &irq0); } #ifdef CONFIG_MCA Index: linux.prev/arch/i386/mach-visws/setup.c =================================================================== --- linux.prev.orig/arch/i386/mach-visws/setup.c +++ linux.prev/arch/i386/mach-visws/setup.c @@ -113,7 +113,7 @@ void __init pre_setup_arch_hook() static struct irqaction irq0 = { .handler = timer_interrupt, - .flags = SA_INTERRUPT, + .flags = SA_INTERRUPT | SA_NODELAY, .name = "timer", }; Index: linux.prev/arch/i386/mach-visws/visws_apic.c =================================================================== --- linux.prev.orig/arch/i386/mach-visws/visws_apic.c +++ linux.prev/arch/i386/mach-visws/visws_apic.c @@ -260,11 +260,13 @@ out_unlock: static struct irqaction master_action = { .handler = piix4_master_intr, .name = "PIIX4-8259", + .flags = SA_NODELAY, }; static struct irqaction cascade_action = { .handler = no_action, .name = "cascade", + .flags = SA_NODELAY, }; Index: linux.prev/arch/i386/mach-voyager/setup.c =================================================================== --- linux.prev.orig/arch/i386/mach-voyager/setup.c +++ linux.prev/arch/i386/mach-voyager/setup.c @@ -16,7 +16,7 @@ void __init pre_intr_init_hook(void) /* * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, SA_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL}; void __init intr_init_hook(void) { @@ -39,7 +39,7 @@ void __init trap_init_hook(void) { } -static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL}; +static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT | SA_NODELAY, CPU_MASK_NONE, "timer", NULL, NULL}; void __init time_init_hook(void) { Index: linux.prev/arch/i386/mm/fault.c =================================================================== --- linux.prev.orig/arch/i386/mm/fault.c +++ linux.prev/arch/i386/mm/fault.c @@ -39,6 +39,8 @@ void bust_spinlocks(int yes) int loglevel_save = console_loglevel; if (yes) { + stop_trace(); + zap_rt_locks(); oops_in_progress = 1; return; } @@ -224,8 +226,8 @@ fastcall void do_invalid_op(struct pt_re * bit 1 == 0 means read, 1 means write * bit 2 == 0 means kernel, 1 means user-mode */ -fastcall void __kprobes do_page_fault(struct pt_regs *regs, - unsigned long error_code) +fastcall notrace void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long error_code) { struct task_struct *tsk; struct mm_struct *mm; @@ -236,13 +238,14 @@ fastcall void __kprobes do_page_fault(st /* get the address */ address = read_cr2(); + trace_special(regs->eip, error_code, address); if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, SIGSEGV) == NOTIFY_STOP) return; /* It's safe to allow irq's after cr2 has been saved */ if (regs->eflags & (X86_EFLAGS_IF|VM_MASK)) - local_irq_enable(); + raw_local_irq_enable(); tsk = current; @@ -449,9 +452,9 @@ no_context: } #endif if (address < PAGE_SIZE) - printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); + printk(KERN_ALERT "BUG: Unable to handle kernel NULL pointer dereference"); else - printk(KERN_ALERT "Unable to handle kernel paging request"); + printk(KERN_ALERT "BUG: Unable to handle kernel paging request"); printk(" at virtual address %08lx\n",address); printk(KERN_ALERT " printing eip:\n"); printk("%08lx\n", regs->eip); Index: linux.prev/arch/i386/mm/highmem.c =================================================================== --- linux.prev.orig/arch/i386/mm/highmem.c +++ linux.prev/arch/i386/mm/highmem.c @@ -18,6 +18,27 @@ void kunmap(struct page *page) kunmap_high(page); } +void kunmap_virt(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return; + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + kunmap(page); +} + +struct page *kmap_to_page(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return virt_to_page(ptr); + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + return page; +} + + /* * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because * no global lock is needed and because the kmap code must perform a global TLB @@ -26,7 +47,7 @@ void kunmap(struct page *page) * However when holding an atomic kmap is is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. */ -void *kmap_atomic(struct page *page, enum km_type type) +void *__kmap_atomic(struct page *page, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; @@ -48,7 +69,7 @@ void *kmap_atomic(struct page *page, enu return (void*) vaddr; } -void kunmap_atomic(void *kvaddr, enum km_type type) +void __kunmap_atomic(void *kvaddr, enum km_type type) { #ifdef CONFIG_DEBUG_HIGHMEM unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; @@ -78,7 +99,7 @@ void kunmap_atomic(void *kvaddr, enum km /* This is the same as kmap_atomic() but can map memory that doesn't * have a struct page associated with it. */ -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +void *__kmap_atomic_pfn(unsigned long pfn, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; @@ -93,7 +114,7 @@ void *kmap_atomic_pfn(unsigned long pfn, return (void*) vaddr; } -struct page *kmap_atomic_to_page(void *ptr) +struct page *__kmap_atomic_to_page(void *ptr) { unsigned long idx, vaddr = (unsigned long)ptr; pte_t *pte; @@ -108,6 +129,7 @@ struct page *kmap_atomic_to_page(void *p EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_to_page); +EXPORT_SYMBOL(kunmap_virt); +EXPORT_SYMBOL(__kmap_atomic); +EXPORT_SYMBOL(__kunmap_atomic); +EXPORT_SYMBOL(__kmap_atomic_to_page); Index: linux.prev/arch/i386/mm/init.c =================================================================== --- linux.prev.orig/arch/i386/mm/init.c +++ linux.prev/arch/i386/mm/init.c @@ -44,7 +44,7 @@ unsigned int __VMALLOC_RESERVE = 128 << 20; -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long highstart_pfn, highend_pfn; static int noinline do_test_wp_bit(void); Index: linux.prev/arch/i386/mm/pageattr.c =================================================================== --- linux.prev.orig/arch/i386/mm/pageattr.c +++ linux.prev/arch/i386/mm/pageattr.c @@ -207,6 +207,9 @@ void kernel_map_pages(struct page *page, { if (PageHighMem(page)) return; + if (!enable) + check_no_locks_freed(page_address(page), page_address(page+numpages)); + /* the return value is ignored - the calls cannot fail, * large pages are disabled at boot time. */ Index: linux.prev/arch/i386/mm/pgtable.c =================================================================== --- linux.prev.orig/arch/i386/mm/pgtable.c +++ linux.prev/arch/i386/mm/pgtable.c @@ -183,7 +183,7 @@ void pmd_ctor(void *pmd, kmem_cache_t *c * recommendations and having no core impact whatsoever. * -- wli */ -DEFINE_SPINLOCK(pgd_lock); +DEFINE_RAW_SPINLOCK(pgd_lock); struct page *pgd_list; static inline void pgd_list_add(pgd_t *pgd) Index: linux.prev/arch/i386/oprofile/Kconfig =================================================================== --- linux.prev.orig/arch/i386/oprofile/Kconfig +++ linux.prev/arch/i386/oprofile/Kconfig @@ -15,3 +15,6 @@ config OPROFILE If unsure, say N. +config PROFILE_NMI + bool + default y Index: linux.prev/arch/i386/pci/Makefile =================================================================== --- linux.prev.orig/arch/i386/pci/Makefile +++ linux.prev/arch/i386/pci/Makefile @@ -4,8 +4,9 @@ obj-$(CONFIG_PCI_BIOS) += pcbios.o obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o obj-$(CONFIG_PCI_DIRECT) += direct.o +obj-$(CONFIG_ACPI) += acpi.o + pci-y := fixup.o -pci-$(CONFIG_ACPI) += acpi.o pci-y += legacy.o irq.o pci-$(CONFIG_X86_VISWS) := visws.o fixup.o Index: linux.prev/arch/i386/pci/direct.c =================================================================== --- linux.prev.orig/arch/i386/pci/direct.c +++ linux.prev/arch/i386/pci/direct.c @@ -211,16 +211,23 @@ static int __init pci_check_type1(void) unsigned int tmp; int works = 0; - local_irq_save(flags); + spin_lock_irqsave(&pci_config_lock, flags); outb(0x01, 0xCFB); tmp = inl(0xCF8); outl(0x80000000, 0xCF8); - if (inl(0xCF8) == 0x80000000 && pci_sanity_check(&pci_direct_conf1)) { - works = 1; + + if (inl(0xCF8) == 0x80000000) { + spin_unlock_irqrestore(&pci_config_lock, flags); + + if (pci_sanity_check(&pci_direct_conf1)) + works = 1; + + spin_lock_irqsave(&pci_config_lock, flags); } outl(tmp, 0xCF8); - local_irq_restore(flags); + + spin_unlock_irqrestore(&pci_config_lock, flags); return works; } @@ -230,17 +237,19 @@ static int __init pci_check_type2(void) unsigned long flags; int works = 0; - local_irq_save(flags); + spin_lock_irqsave(&pci_config_lock, flags); outb(0x00, 0xCFB); outb(0x00, 0xCF8); outb(0x00, 0xCFA); - if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00 && - pci_sanity_check(&pci_direct_conf2)) { - works = 1; - } - local_irq_restore(flags); + if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00) { + spin_unlock_irqrestore(&pci_config_lock, flags); + + if (pci_sanity_check(&pci_direct_conf2)) + works = 1; + } else + spin_unlock_irqrestore(&pci_config_lock, flags); return works; } Index: linux.prev/arch/i386/pci/pcbios.c =================================================================== --- linux.prev.orig/arch/i386/pci/pcbios.c +++ linux.prev/arch/i386/pci/pcbios.c @@ -70,7 +70,7 @@ static unsigned long bios32_service(unsi unsigned long entry; /* %edx */ unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); __asm__("lcall *(%%edi); cld" : "=a" (return_code), "=b" (address), @@ -79,7 +79,7 @@ static unsigned long bios32_service(unsi : "0" (service), "1" (0), "D" (&bios32_indirect)); - local_irq_restore(flags); + raw_local_irq_restore(flags); switch (return_code) { case 0: @@ -110,7 +110,7 @@ static int __devinit check_pcibios(void) if ((pcibios_entry = bios32_service(PCI_SERVICE))) { pci_indirect.address = pcibios_entry + PAGE_OFFSET; - local_irq_save(flags); + raw_local_irq_save(flags); __asm__( "lcall *(%%edi); cld\n\t" "jc 1f\n\t" @@ -123,7 +123,7 @@ static int __devinit check_pcibios(void) : "1" (PCIBIOS_PCI_BIOS_PRESENT), "D" (&pci_indirect) : "memory"); - local_irq_restore(flags); + raw_local_irq_restore(flags); status = (eax >> 8) & 0xff; hw_mech = eax & 0xff; Index: linux.prev/arch/mips/Kconfig =================================================================== --- linux.prev.orig/arch/mips/Kconfig +++ linux.prev/arch/mips/Kconfig @@ -362,6 +362,7 @@ config MOMENCO_JAGUAR_ATX config MOMENCO_OCELOT bool "Support for Momentum Ocelot board" select DMA_NONCOHERENT + select NO_SPINLOCK select HW_HAS_PCI select IRQ_CPU select IRQ_CPU_RM7K @@ -792,12 +793,21 @@ source "arch/mips/philips/pnx8550/common endmenu +source "kernel/Kconfig.preempt" + config RWSEM_GENERIC_SPINLOCK bool + depends on !PREEMPT_RT default y config RWSEM_XCHGADD_ALGORITHM bool + depends on !PREEMPT_RT + +config ASM_SEMAPHORES + bool +# depends on !PREEMPT_RT + default y config GENERIC_CALIBRATE_DELAY bool @@ -832,6 +842,9 @@ config DMA_NEED_PCI_MAP_STATE config OWN_DMA bool +config NO_SPINLOCK + bool + config EARLY_PRINTK bool @@ -1637,10 +1650,6 @@ config MIPS_INSANE_LARGE endmenu -config RWSEM_GENERIC_SPINLOCK - bool - default y - source "init/Kconfig" menu "Bus options (PCI, PCMCIA, EISA, ISA, TC)" Index: linux.prev/arch/mips/arc/misc.c =================================================================== --- linux.prev.orig/arch/mips/arc/misc.c +++ linux.prev/arch/mips/arc/misc.c @@ -27,7 +27,7 @@ VOID ArcHalt(VOID) { bc_disable(); - local_irq_disable(); + raw_local_irq_disable(); #ifdef CONFIG_SCSI_SGIWD93 reset_wd33c93(sgiwd93_host); #endif @@ -39,7 +39,7 @@ VOID ArcPowerDown(VOID) { bc_disable(); - local_irq_disable(); + raw_local_irq_disable(); #ifdef CONFIG_SCSI_SGIWD93 reset_wd33c93(sgiwd93_host); #endif @@ -52,7 +52,7 @@ VOID ArcRestart(VOID) { bc_disable(); - local_irq_disable(); + raw_local_irq_disable(); #ifdef CONFIG_SCSI_SGIWD93 reset_wd33c93(sgiwd93_host); #endif @@ -64,7 +64,7 @@ VOID ArcReboot(VOID) { bc_disable(); - local_irq_disable(); + raw_local_irq_disable(); #ifdef CONFIG_SCSI_SGIWD93 reset_wd33c93(sgiwd93_host); #endif @@ -76,7 +76,7 @@ VOID ArcEnterInteractiveMode(VOID) { bc_disable(); - local_irq_disable(); + raw_local_irq_disable(); #ifdef CONFIG_SCSI_SGIWD93 reset_wd33c93(sgiwd93_host); #endif Index: linux.prev/arch/mips/gt64120/ev64120/irq.c =================================================================== --- linux.prev.orig/arch/mips/gt64120/ev64120/irq.c +++ linux.prev/arch/mips/gt64120/ev64120/irq.c @@ -60,25 +60,25 @@ static void disable_ev64120_irq(unsigned { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); if (irq_nr >= 8) { // All PCI interrupts are on line 5 or 2 clear_c0_status(9 << 10); } else { clear_c0_status(1 << (irq_nr + 8)); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void enable_ev64120_irq(unsigned int irq_nr) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); if (irq_nr >= 8) // All PCI interrupts are on line 5 or 2 set_c0_status(9 << 10); else set_c0_status(1 << (irq_nr + 8)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_ev64120_irq(unsigned int irq) @@ -119,7 +119,7 @@ void gt64120_irq_setup(void) /* Sets the exception_handler array. */ set_except_vector(0, galileo_handle_int); - local_irq_disable(); + raw_local_irq_disable(); /* * Enable timer. Other interrupts will be enabled as they are Index: linux.prev/arch/mips/gt64120/momenco_ocelot/irq.c =================================================================== --- linux.prev.orig/arch/mips/gt64120/momenco_ocelot/irq.c +++ linux.prev/arch/mips/gt64120/momenco_ocelot/irq.c @@ -57,7 +57,7 @@ void __init arch_init_irq(void) * int-handler is not on bootstrap */ clear_c0_status(ST0_IM); - local_irq_disable(); + raw_local_irq_disable(); /* Sets the first-level interrupt dispatcher. */ set_except_vector(0, ocelot_handle_int); Index: linux.prev/arch/mips/ite-boards/generic/irq.c =================================================================== --- linux.prev.orig/arch/mips/ite-boards/generic/irq.c +++ linux.prev/arch/mips/ite-boards/generic/irq.c @@ -171,9 +171,9 @@ void enable_cpu_timer(void) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); set_c0_status(0x100 << EXT_IRQ5_TO_IP); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __init arch_init_irq(void) Index: linux.prev/arch/mips/ite-boards/generic/time.c =================================================================== --- linux.prev.orig/arch/mips/ite-boards/generic/time.c +++ linux.prev/arch/mips/ite-boards/generic/time.c @@ -124,7 +124,7 @@ static unsigned long __init cal_r4koff(v { unsigned int flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* Start counter exactly on falling edge of update flag */ while (CMOS_READ(RTC_REG_A) & RTC_UIP); @@ -140,7 +140,7 @@ static unsigned long __init cal_r4koff(v mips_hpt_frequency = read_c0_count(); /* restore interrupts */ - local_irq_restore(flags); + raw_local_irq_restore(flags); return (mips_hpt_frequency / HZ); } @@ -153,11 +153,11 @@ it8172_rtc_get_time(void) /* avoid update-in-progress. */ for (;;) { - local_irq_save(flags); + raw_local_irq_save(flags); if (! (CMOS_READ(RTC_REG_A) & RTC_UIP)) break; /* don't hold intr closed all the time */ - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* Read regs. */ @@ -170,7 +170,7 @@ it8172_rtc_get_time(void) hw_to_bin(*rtc_century_reg) * 100; /* restore interrupts */ - local_irq_restore(flags); + raw_local_irq_restore(flags); return mktime(year, mon, day, hour, min, sec); } @@ -186,11 +186,11 @@ it8172_rtc_set_time(unsigned long t) /* avoid update-in-progress. */ for (;;) { - local_irq_save(flags); + raw_local_irq_save(flags); if (! (CMOS_READ(RTC_REG_A) & RTC_UIP)) break; /* don't hold intr closed all the time */ - local_irq_restore(flags); + raw_local_irq_restore(flags); } *rtc_century_reg = bin_to_hw(tm.tm_year/100); @@ -202,7 +202,7 @@ it8172_rtc_set_time(unsigned long t) CMOS_WRITE(bin_to_hw(tm.tm_year%100), RTC_YEAR); /* restore interrupts */ - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -211,7 +211,7 @@ void __init it8172_time_init(void) { unsigned int est_freq, flags; - local_irq_save(flags); + raw_local_irq_save(flags); saved_control = CMOS_READ(RTC_CONTROL); @@ -225,7 +225,7 @@ void __init it8172_time_init(void) printk("CPU frequency %d.%02d MHz\n", est_freq/1000000, (est_freq%1000000)*100/1000000); - local_irq_restore(flags); + raw_local_irq_restore(flags); rtc_get_time = it8172_rtc_get_time; rtc_set_time = it8172_rtc_set_time; Index: linux.prev/arch/mips/jmr3927/rbhma3100/setup.c =================================================================== --- linux.prev.orig/arch/mips/jmr3927/rbhma3100/setup.c +++ linux.prev/arch/mips/jmr3927/rbhma3100/setup.c @@ -115,7 +115,7 @@ static inline void do_reset(void) static void jmr3927_machine_restart(char *command) { - local_irq_disable(); + raw_local_irq_disable(); puts("Rebooting..."); do_reset(); } Index: linux.prev/arch/mips/kernel/Makefile =================================================================== --- linux.prev.orig/arch/mips/kernel/Makefile +++ linux.prev/arch/mips/kernel/Makefile @@ -5,7 +5,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \ - ptrace.o reset.o semaphore.o setup.o signal.o syscall.o \ + ptrace.o reset.o setup.o signal.o syscall.o \ time.o traps.o unaligned.o binfmt_irix-objs := irixelf.o irixinv.o irixioctl.o irixsig.o \ @@ -13,6 +13,8 @@ binfmt_irix-objs := irixelf.o irixinv.o obj-$(CONFIG_MODULES) += mips_ksyms.o module.o +obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o + obj-$(CONFIG_CPU_R3000) += r2300_fpu.o r2300_switch.o obj-$(CONFIG_CPU_TX39XX) += r2300_fpu.o r2300_switch.o obj-$(CONFIG_CPU_TX49XX) += r4k_fpu.o r4k_switch.o Index: linux.prev/arch/mips/kernel/asm-offsets.c =================================================================== --- linux.prev.orig/arch/mips/kernel/asm-offsets.c +++ linux.prev/arch/mips/kernel/asm-offsets.c @@ -11,6 +11,9 @@ #include #include #include +#include +#include +#include #include #include #include Index: linux.prev/arch/mips/kernel/cpu-bugs64.c =================================================================== --- linux.prev.orig/arch/mips/kernel/cpu-bugs64.c +++ linux.prev/arch/mips/kernel/cpu-bugs64.c @@ -48,7 +48,7 @@ static inline void mult_sh_align_mod(lon * used for. */ - local_irq_save(flags); + raw_local_irq_save(flags); /* * The following code leads to a wrong result of the first * dsll32 when executed on R4000 rev. 2.2 or 3.0 (PRId @@ -101,7 +101,7 @@ static inline void mult_sh_align_mod(lon "" : "=r" (lv2) : "0" (lv2), "r" (p)); - local_irq_restore(flags); + raw_local_irq_restore(flags); *v1 = lv1; *v2 = lv2; @@ -182,7 +182,7 @@ static inline void check_daddi(void) printk("Checking for the daddi bug... "); - local_irq_save(flags); + raw_local_irq_save(flags); handler = set_except_vector(12, handle_daddi_ov); /* * The following code fails to trigger an overflow exception @@ -208,7 +208,7 @@ static inline void check_daddi(void) : "=r" (v), "=&r" (tmp) : "I" (0xffffffffffffdb9a), "I" (0x1234)); set_except_vector(12, handler); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (daddi_ov) { printk("no.\n"); @@ -217,7 +217,7 @@ static inline void check_daddi(void) printk("yes, workaround... "); - local_irq_save(flags); + raw_local_irq_save(flags); handler = set_except_vector(12, handle_daddi_ov); asm volatile( "addiu %1, $0, %2\n\t" @@ -226,7 +226,7 @@ static inline void check_daddi(void) : "=r" (v), "=&r" (tmp) : "I" (0xffffffffffffdb9a), "I" (0x1234)); set_except_vector(12, handler); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (daddi_ov) { printk("yes.\n"); Index: linux.prev/arch/mips/kernel/entry.S =================================================================== --- linux.prev.orig/arch/mips/kernel/entry.S +++ linux.prev/arch/mips/kernel/entry.S @@ -23,7 +23,7 @@ .endm #else .macro preempt_stop - local_irq_disable + mips_raw_local_irq_disable .endm #define resume_kernel restore_all #endif @@ -38,7 +38,7 @@ FEXPORT(ret_from_irq) beqz t0, resume_kernel resume_userspace: - local_irq_disable # make sure we dont miss an + mips_raw_local_irq_disable # make sure we dont miss an # interrupt setting need_resched # between sampling and return LONG_L a2, TI_FLAGS($28) # current->work @@ -48,7 +48,9 @@ resume_userspace: #ifdef CONFIG_PREEMPT resume_kernel: - local_irq_disable + mips_local_irq_disable + lw t0, kernel_preemption + beqz t0, restore_all lw t0, TI_PRE_COUNT($28) bnez t0, restore_all need_resched: @@ -66,7 +68,7 @@ FEXPORT(ret_from_fork) jal schedule_tail # a0 = task_t *prev FEXPORT(syscall_exit) - local_irq_disable # make sure need_resched and + mips_raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) # current->work @@ -85,19 +87,21 @@ FEXPORT(restore_partial) # restore part .set at work_pending: - andi t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS + # a2 is preloaded with TI_FLAGS + andi t0, a2, (_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) beqz t0, work_notifysig work_resched: + mips_raw_local_irq_enable t0 jal schedule - local_irq_disable # make sure need_resched and + mips_raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) andi t0, a2, _TIF_WORK_MASK # is there any work to be done # other than syscall tracing? beqz t0, restore_all - andi t0, a2, _TIF_NEED_RESCHED + andi t0, a2, (_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bnez t0, work_resched work_notifysig: # deal with pending signals and @@ -113,7 +117,7 @@ syscall_exit_work: li t0, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT and t0, a2 # a2 is preloaded with TI_FLAGS beqz t0, work_pending # trace bit set? - local_irq_enable # could let do_syscall_trace() + mips_raw_local_irq_enable # could let do_syscall_trace() # call schedule() instead move a0, sp li a1, 1 Index: linux.prev/arch/mips/kernel/gdb-stub.c =================================================================== --- linux.prev.orig/arch/mips/kernel/gdb-stub.c +++ linux.prev/arch/mips/kernel/gdb-stub.c @@ -402,7 +402,7 @@ void set_debug_traps(void) unsigned long flags; unsigned char c; - local_irq_save(flags); + raw_local_irq_save(flags); for (ht = hard_trap_info; ht->tt && ht->signo; ht++) saved_vectors[ht->tt] = set_except_vector(ht->tt, trap_low); @@ -418,7 +418,7 @@ void set_debug_traps(void) putDebugChar('+'); /* ack it */ initialized = 1; - local_irq_restore(flags); + raw_local_irq_restore(flags); } void restore_debug_traps(void) @@ -426,10 +426,10 @@ void restore_debug_traps(void) struct hard_trap_info *ht; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); for (ht = hard_trap_info; ht->tt && ht->signo; ht++) set_except_vector(ht->tt, saved_vectors[ht->tt]); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -661,12 +661,12 @@ static void kgdb_wait(void *arg) unsigned flags; int cpu = smp_processor_id(); - local_irq_save(flags); + raw_local_irq_save(flags); __raw_spin_lock(&kgdb_cpulock[cpu]); __raw_spin_unlock(&kgdb_cpulock[cpu]); - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/mips/kernel/i8259.c =================================================================== --- linux.prev.orig/arch/mips/kernel/i8259.c +++ linux.prev/arch/mips/kernel/i8259.c @@ -31,7 +31,7 @@ void disable_8259A_irq(unsigned int irq) * moves to arch independent land */ -DEFINE_SPINLOCK(i8259A_lock); +DEFINE_RAW_SPINLOCK(i8259A_lock); static void end_8259A_irq (unsigned int irq) { Index: linux.prev/arch/mips/kernel/init_task.c =================================================================== --- linux.prev.orig/arch/mips/kernel/init_task.c +++ linux.prev/arch/mips/kernel/init_task.c @@ -9,8 +9,8 @@ #include #include -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); Index: linux.prev/arch/mips/kernel/irq-rm7000.c =================================================================== --- linux.prev.orig/arch/mips/kernel/irq-rm7000.c +++ linux.prev/arch/mips/kernel/irq-rm7000.c @@ -33,18 +33,18 @@ static inline void rm7k_cpu_irq_enable(u { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); unmask_rm7k_irq(irq); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void rm7k_cpu_irq_disable(unsigned int irq) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); mask_rm7k_irq(irq); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int rm7k_cpu_irq_startup(unsigned int irq) Index: linux.prev/arch/mips/kernel/irq-rm9000.c =================================================================== --- linux.prev.orig/arch/mips/kernel/irq-rm9000.c +++ linux.prev/arch/mips/kernel/irq-rm9000.c @@ -34,18 +34,18 @@ static inline void rm9k_cpu_irq_enable(u { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); unmask_rm9k_irq(irq); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void rm9k_cpu_irq_disable(unsigned int irq) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); mask_rm9k_irq(irq); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int rm9k_cpu_irq_startup(unsigned int irq) @@ -79,9 +79,9 @@ static void local_rm9k_perfcounter_irq_s unsigned int irq = (unsigned int) args; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); mask_rm9k_irq(irq); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void rm9k_perfcounter_irq_shutdown(unsigned int irq) Index: linux.prev/arch/mips/kernel/irq.c =================================================================== --- linux.prev.orig/arch/mips/kernel/irq.c +++ linux.prev/arch/mips/kernel/irq.c @@ -125,7 +125,10 @@ void __init init_IRQ(void) irq_desc[i].action = NULL; irq_desc[i].depth = 1; irq_desc[i].handler = &no_irq_type; - spin_lock_init(&irq_desc[i].lock); + __raw_spin_lock_init(&irq_desc[i].lock); +#ifdef CONFIG_PREEMPT_HARDIRQS + irq_desc[i].thread = NULL; +#endif } arch_init_irq(); Index: linux.prev/arch/mips/kernel/irq_cpu.c =================================================================== --- linux.prev.orig/arch/mips/kernel/irq_cpu.c +++ linux.prev/arch/mips/kernel/irq_cpu.c @@ -54,20 +54,20 @@ static inline void mips_cpu_irq_enable(u { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); unmask_mips_irq(irq); back_to_back_c0_hazard(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void mips_cpu_irq_disable(unsigned int irq) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); mask_mips_irq(irq); back_to_back_c0_hazard(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int mips_cpu_irq_startup(unsigned int irq) Index: linux.prev/arch/mips/kernel/module.c =================================================================== --- linux.prev.orig/arch/mips/kernel/module.c +++ linux.prev/arch/mips/kernel/module.c @@ -39,7 +39,7 @@ struct mips_hi16 { static struct mips_hi16 *mips_hi16_list; static LIST_HEAD(dbe_list); -static DEFINE_SPINLOCK(dbe_lock); +static DEFINE_RAW_SPINLOCK(dbe_lock); void *module_alloc(unsigned long size) { Index: linux.prev/arch/mips/kernel/process.c =================================================================== --- linux.prev.orig/arch/mips/kernel/process.c +++ linux.prev/arch/mips/kernel/process.c @@ -47,13 +47,15 @@ */ ATTRIB_NORET void cpu_idle(void) { + raw_local_irq_enable(); + /* endless idle loop with no priority at all */ while (1) { - while (!need_resched()) + while (!need_resched() && !need_resched_delayed()) if (cpu_wait) (*cpu_wait)(); - preempt_enable_no_resched(); - schedule(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); } } Index: linux.prev/arch/mips/kernel/scall32-o32.S =================================================================== --- linux.prev.orig/arch/mips/kernel/scall32-o32.S +++ linux.prev/arch/mips/kernel/scall32-o32.S @@ -72,7 +72,7 @@ stack_done: 1: sw v0, PT_R2(sp) # result o32_syscall_exit: - local_irq_disable # make sure need_resched and + mips_raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return lw a2, TI_FLAGS($28) # current->work Index: linux.prev/arch/mips/kernel/scall64-64.S =================================================================== --- linux.prev.orig/arch/mips/kernel/scall64-64.S +++ linux.prev/arch/mips/kernel/scall64-64.S @@ -71,7 +71,7 @@ NESTED(handle_sys64, PT_SIZE, sp) 1: sd v0, PT_R2(sp) # result n64_syscall_exit: - local_irq_disable # make sure need_resched and + raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) # current->work Index: linux.prev/arch/mips/kernel/scall64-n32.S =================================================================== --- linux.prev.orig/arch/mips/kernel/scall64-n32.S +++ linux.prev/arch/mips/kernel/scall64-n32.S @@ -68,7 +68,7 @@ NESTED(handle_sysn32, PT_SIZE, sp) sd v0, PT_R0(sp) # set flag for syscall restarting 1: sd v0, PT_R2(sp) # result - local_irq_disable # make sure need_resched and + raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) # current->work Index: linux.prev/arch/mips/kernel/scall64-o32.S =================================================================== --- linux.prev.orig/arch/mips/kernel/scall64-o32.S +++ linux.prev/arch/mips/kernel/scall64-o32.S @@ -97,7 +97,7 @@ NESTED(handle_sys, PT_SIZE, sp) 1: sd v0, PT_R2(sp) # result o32_syscall_exit: - local_irq_disable # make need_resched and + raw_local_irq_disable # make need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) Index: linux.prev/arch/mips/kernel/semaphore.c =================================================================== --- linux.prev.orig/arch/mips/kernel/semaphore.c +++ linux.prev/arch/mips/kernel/semaphore.c @@ -36,7 +36,7 @@ * sem->count and sem->waking atomic. Scalability isn't an issue because * this lock is used on UP only so it's just an empty variable. */ -static inline int __sem_update_count(struct semaphore *sem, int incr) +static inline int __sem_update_count(struct compat_semaphore *sem, int incr) { int old_count, tmp; @@ -67,7 +67,7 @@ static inline int __sem_update_count(str : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) : "r" (incr), "m" (sem->count)); } else { - static DEFINE_SPINLOCK(semaphore_lock); + static DEFINE_RAW_SPINLOCK(semaphore_lock); unsigned long flags; spin_lock_irqsave(&semaphore_lock, flags); @@ -80,7 +80,7 @@ static inline int __sem_update_count(str return old_count; } -void __up(struct semaphore *sem) +void __compat_up(struct compat_semaphore *sem) { /* * Note that we incremented count in up() before we came here, @@ -94,7 +94,7 @@ void __up(struct semaphore *sem) wake_up(&sem->wait); } -EXPORT_SYMBOL(__up); +EXPORT_SYMBOL(__compat_up); /* * Note that when we come in to __down or __down_interruptible, @@ -104,7 +104,7 @@ EXPORT_SYMBOL(__up); * Thus it is only when we decrement count from some value > 0 * that we have actually got the semaphore. */ -void __sched __down(struct semaphore *sem) +void __sched __compat_down(struct compat_semaphore *sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -133,9 +133,9 @@ void __sched __down(struct semaphore *se wake_up(&sem->wait); } -EXPORT_SYMBOL(__down); +EXPORT_SYMBOL(__compat_down); -int __sched __down_interruptible(struct semaphore * sem) +int __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -165,4 +165,4 @@ int __sched __down_interruptible(struct return retval; } -EXPORT_SYMBOL(__down_interruptible); +EXPORT_SYMBOL(__compat_down_interruptible); Index: linux.prev/arch/mips/kernel/signal.c =================================================================== --- linux.prev.orig/arch/mips/kernel/signal.c +++ linux.prev/arch/mips/kernel/signal.c @@ -426,6 +426,10 @@ int do_signal(sigset_t *oldset, struct p siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which is why we may in certain * cases get here from kernel mode. Just return without doing anything Index: linux.prev/arch/mips/kernel/signal32.c =================================================================== --- linux.prev.orig/arch/mips/kernel/signal32.c +++ linux.prev/arch/mips/kernel/signal32.c @@ -814,6 +814,10 @@ int do_signal32(sigset_t *oldset, struct siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which is why we may in certain * cases get here from kernel mode. Just return without doing anything Index: linux.prev/arch/mips/kernel/smp.c =================================================================== --- linux.prev.orig/arch/mips/kernel/smp.c +++ linux.prev/arch/mips/kernel/smp.c @@ -106,7 +106,22 @@ asmlinkage void start_secondary(void) cpu_idle(); } -DEFINE_SPINLOCK(smp_call_lock); +DEFINE_RAW_SPINLOCK(smp_call_lock); + +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them. + */ +void smp_send_reschedule_allbutself(void) +{ + int cpu = smp_processor_id(); + int i; + + for (i = 0; i < NR_CPUS; i++) + if (cpu_online(i) && i != cpu) + core_send_ipi(i, SMP_RESCHEDULE_YOURSELF); +} struct call_data_struct *call_data; @@ -215,7 +230,7 @@ static void stop_this_cpu(void *dummy) * Remove this CPU: */ cpu_clear(smp_processor_id(), cpu_online_map); - local_irq_enable(); /* May need to service _machine_restart IPI */ + raw_local_irq_enable(); /* May need to service _machine_restart IPI */ for (;;); /* Wait if available. */ } @@ -289,6 +304,8 @@ int setup_profiling_timer(unsigned int m return 0; } +static DEFINE_RAW_SPINLOCK(tlbstate_lock); + static void flush_tlb_all_ipi(void *info) { local_flush_tlb_all(); @@ -320,6 +337,7 @@ static void flush_tlb_mm_ipi(void *mm) void flush_tlb_mm(struct mm_struct *mm) { preempt_disable(); + spin_lock(&tlbstate_lock); if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { smp_call_function(flush_tlb_mm_ipi, (void *)mm, 1, 1); @@ -329,6 +347,7 @@ void flush_tlb_mm(struct mm_struct *mm) if (smp_processor_id() != i) cpu_context(i, mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_mm(mm); preempt_enable(); @@ -352,6 +371,8 @@ void flush_tlb_range(struct vm_area_stru struct mm_struct *mm = vma->vm_mm; preempt_disable(); + spin_lock(&tlbstate_lock); + if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { struct flush_tlb_data fd; @@ -365,6 +386,7 @@ void flush_tlb_range(struct vm_area_stru if (smp_processor_id() != i) cpu_context(i, mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_range(vma, start, end); preempt_enable(); } @@ -395,6 +417,8 @@ static void flush_tlb_page_ipi(void *inf void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { preempt_disable(); + spin_lock(&tlbstate_lock); + if ((atomic_read(&vma->vm_mm->mm_users) != 1) || (current->mm != vma->vm_mm)) { struct flush_tlb_data fd; @@ -407,6 +431,7 @@ void flush_tlb_page(struct vm_area_struc if (smp_processor_id() != i) cpu_context(i, vma->vm_mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_page(vma, page); preempt_enable(); } Index: linux.prev/arch/mips/kernel/time.c =================================================================== --- linux.prev.orig/arch/mips/kernel/time.c +++ linux.prev/arch/mips/kernel/time.c @@ -50,7 +50,7 @@ */ extern volatile unsigned long wall_jiffies; -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); /* * By default we provide the null RTC ops @@ -554,7 +554,7 @@ unsigned int mips_hpt_frequency; static struct irqaction timer_irqaction = { .handler = timer_interrupt, - .flags = SA_INTERRUPT, + .flags = SA_NODELAY | SA_INTERRUPT, .name = "timer", }; Index: linux.prev/arch/mips/kernel/traps.c =================================================================== --- linux.prev.orig/arch/mips/kernel/traps.c +++ linux.prev/arch/mips/kernel/traps.c @@ -274,7 +274,7 @@ void show_registers(struct pt_regs *regs printk("\n"); } -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); NORET_TYPE void ATTRIB_NORET die(const char * str, struct pt_regs * regs) { Index: linux.prev/arch/mips/lasat/interrupt.c =================================================================== --- linux.prev.orig/arch/mips/lasat/interrupt.c +++ linux.prev/arch/mips/lasat/interrupt.c @@ -39,18 +39,18 @@ void disable_lasat_irq(unsigned int irq_ { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); *lasat_int_mask &= ~(1 << irq_nr) << lasat_int_mask_shift; - local_irq_restore(flags); + raw_local_irq_restore(flags); } void enable_lasat_irq(unsigned int irq_nr) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); *lasat_int_mask |= (1 << irq_nr) << lasat_int_mask_shift; - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_lasat_irq(unsigned int irq) Index: linux.prev/arch/mips/lasat/reset.c =================================================================== --- linux.prev.orig/arch/mips/lasat/reset.c +++ linux.prev/arch/mips/lasat/reset.c @@ -33,7 +33,7 @@ int lasat_boot_to_service = 0; static void lasat_machine_restart(char *command) { - local_irq_disable(); + raw_local_irq_disable(); if (lasat_boot_to_service) { printk("machine_restart: Rebooting to service mode\n"); @@ -47,7 +47,7 @@ static void lasat_machine_restart(char * #define MESSAGE "System halted" static void lasat_machine_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); /* Disable interrupts and loop forever */ printk(KERN_NOTICE MESSAGE "\n"); Index: linux.prev/arch/mips/lib-32/dump_tlb.c =================================================================== --- linux.prev.orig/arch/mips/lib-32/dump_tlb.c +++ linux.prev/arch/mips/lib-32/dump_tlb.c @@ -118,7 +118,7 @@ void dump_tlb_addr(unsigned long addr) unsigned int flags, oldpid; int index; - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi() & 0xff; BARRIER(); write_c0_entryhi((addr & PAGE_MASK) | oldpid); @@ -127,7 +127,7 @@ void dump_tlb_addr(unsigned long addr) BARRIER(); index = read_c0_index(); write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (index < 0) { printk("No entry for address 0x%08lx in TLB\n", addr); Index: linux.prev/arch/mips/lib-32/r3k_dump_tlb.c =================================================================== --- linux.prev.orig/arch/mips/lib-32/r3k_dump_tlb.c +++ linux.prev/arch/mips/lib-32/r3k_dump_tlb.c @@ -79,13 +79,13 @@ void dump_tlb_addr(unsigned long addr) unsigned long flags, oldpid; int index; - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi() & 0xff; write_c0_entryhi((addr & PAGE_MASK) | oldpid); tlb_probe(); index = read_c0_index(); write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (index < 0) { printk("No entry for address 0x%08lx in TLB\n", addr); Index: linux.prev/arch/mips/lib-64/dump_tlb.c =================================================================== --- linux.prev.orig/arch/mips/lib-64/dump_tlb.c +++ linux.prev/arch/mips/lib-64/dump_tlb.c @@ -112,7 +112,7 @@ void dump_tlb_addr(unsigned long addr) unsigned int flags, oldpid; int index; - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi() & 0xff; BARRIER(); write_c0_entryhi((addr & PAGE_MASK) | oldpid); @@ -121,7 +121,7 @@ void dump_tlb_addr(unsigned long addr) BARRIER(); index = read_c0_index(); write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (index < 0) { printk("No entry for address 0x%08lx in TLB\n", addr); Index: linux.prev/arch/mips/math-emu/cp1emu.c =================================================================== --- linux.prev.orig/arch/mips/math-emu/cp1emu.c +++ linux.prev/arch/mips/math-emu/cp1emu.c @@ -1269,7 +1269,9 @@ int fpu_emulator_cop1Handler(struct pt_r if (sig) break; + preempt_enable(); cond_resched(); + preempt_disable(); } while (xcp->cp0_epc > prevepc); /* SIGILL indicates a non-fpu instruction */ Index: linux.prev/arch/mips/mips-boards/generic/time.c =================================================================== --- linux.prev.orig/arch/mips/mips-boards/generic/time.c +++ linux.prev/arch/mips/mips-boards/generic/time.c @@ -139,7 +139,7 @@ static unsigned int __init estimate_cpu_ #if defined(CONFIG_MIPS_ATLAS) || defined(CONFIG_MIPS_MALTA) unsigned int flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* Start counter exactly on falling edge of update flag */ while (CMOS_READ(RTC_REG_A) & RTC_UIP); @@ -155,7 +155,7 @@ static unsigned int __init estimate_cpu_ count = read_c0_count(); /* restore interrupts */ - local_irq_restore(flags); + raw_local_irq_restore(flags); #endif mips_hpt_frequency = count; @@ -178,7 +178,7 @@ void __init mips_time_init(void) { unsigned int est_freq, flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* Set Data mode - binary. */ CMOS_WRITE(CMOS_READ(RTC_CONTROL) | RTC_DM_BINARY, RTC_CONTROL); @@ -190,7 +190,7 @@ void __init mips_time_init(void) cpu_khz = est_freq / 1000; - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __init mips_timer_setup(struct irqaction *irq) Index: linux.prev/arch/mips/mm/c-r4k.c =================================================================== --- linux.prev.orig/arch/mips/mm/c-r4k.c +++ linux.prev/arch/mips/mm/c-r4k.c @@ -117,9 +117,9 @@ static inline void blast_r4600_v1_icache { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); blast_icache32(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void tx49_blast_icache32(void) @@ -147,9 +147,9 @@ static inline void blast_icache32_r4600_ { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); blast_icache32_page_indexed(page); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void tx49_blast_icache32_page_indexed(unsigned long page) @@ -1090,7 +1090,7 @@ static int __init probe_scache(void) * This is such a bitch, you'd think they would make it easy to do * this. Away you daemons of stupidity! */ - local_irq_save(flags); + raw_local_irq_save(flags); /* Fill each size-multiple cache line with a valid tag. */ pow2 = (64 * 1024); @@ -1118,7 +1118,7 @@ static int __init probe_scache(void) break; pow2 <<= 1; } - local_irq_restore(flags); + raw_local_irq_restore(flags); addr -= begin; scache_size = addr; Index: linux.prev/arch/mips/mm/c-tx39.c =================================================================== --- linux.prev.orig/arch/mips/mm/c-tx39.c +++ linux.prev/arch/mips/mm/c-tx39.c @@ -49,7 +49,7 @@ static void tx39h_flush_icache_all(void) unsigned long flags, config; /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); @@ -61,7 +61,7 @@ static void tx39h_flush_icache_all(void) } write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void tx39h_dma_cache_wback_inv(unsigned long addr, unsigned long size) @@ -104,39 +104,39 @@ static inline void tx39_blast_icache_pag { unsigned long flags, config; /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); blast_icache16_page(addr); write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void tx39_blast_icache_page_indexed(unsigned long addr) { unsigned long flags, config; /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); blast_icache16_page_indexed(addr); write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void tx39_blast_icache(void) { unsigned long flags, config; /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); blast_icache16(); write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void tx39_flush_cache_all(void) @@ -266,7 +266,7 @@ static void tx39_flush_icache_range(unsi addr = start & ~(dc_lsize - 1); aend = (end - 1) & ~(dc_lsize - 1); /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); @@ -278,7 +278,7 @@ static void tx39_flush_icache_range(unsi addr += dc_lsize; } write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -367,13 +367,13 @@ static void tx39_flush_cache_sigtramp(un protected_writeback_dcache_line(addr & ~(dc_lsize - 1)); /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); protected_flush_icache_line(addr & ~(ic_lsize - 1)); write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static __init void tx39_probe_cache(void) Index: linux.prev/arch/mips/mm/init.c =================================================================== --- linux.prev.orig/arch/mips/mm/init.c +++ linux.prev/arch/mips/mm/init.c @@ -35,7 +35,7 @@ #include #include -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long highstart_pfn, highend_pfn; Index: linux.prev/arch/mips/mm/sc-ip22.c =================================================================== --- linux.prev.orig/arch/mips/mm/sc-ip22.c +++ linux.prev/arch/mips/mm/sc-ip22.c @@ -72,7 +72,7 @@ static void indy_sc_wback_invalidate(uns first_line = SC_INDEX(addr); last_line = SC_INDEX(addr + size - 1); - local_irq_save(flags); + raw_local_irq_save(flags); if (first_line <= last_line) { indy_sc_wipe(first_line, last_line); goto out; @@ -81,7 +81,7 @@ static void indy_sc_wback_invalidate(uns indy_sc_wipe(first_line, SC_SIZE - SC_LINE); indy_sc_wipe(0, last_line); out: - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void indy_sc_enable(void) Index: linux.prev/arch/mips/mm/sc-r5k.c =================================================================== --- linux.prev.orig/arch/mips/mm/sc-r5k.c +++ linux.prev/arch/mips/mm/sc-r5k.c @@ -61,20 +61,20 @@ static void r5k_sc_enable(void) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); set_c0_config(R5K_CONF_SE); blast_r5000_scache(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void r5k_sc_disable(void) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); blast_r5000_scache(); clear_c0_config(R5K_CONF_SE); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline int __init r5k_sc_probe(void) Index: linux.prev/arch/mips/mm/tlb-andes.c =================================================================== --- linux.prev.orig/arch/mips/mm/tlb-andes.c +++ linux.prev/arch/mips/mm/tlb-andes.c @@ -27,7 +27,7 @@ void local_flush_tlb_all(void) unsigned long old_ctx; unsigned long entry; - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi() & ASID_MASK; write_c0_entryhi(CKSEG0); @@ -43,7 +43,7 @@ void local_flush_tlb_all(void) entry++; } write_c0_entryhi(old_ctx); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_mm(struct mm_struct *mm) @@ -64,7 +64,7 @@ void local_flush_tlb_range(struct vm_are unsigned long flags; int size; - local_irq_save(flags); + raw_local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; if (size <= NTLB_ENTRIES_HALF) { @@ -93,7 +93,7 @@ void local_flush_tlb_range(struct vm_are } else { drop_mmu_context(mm, cpu); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -105,7 +105,7 @@ void local_flush_tlb_kernel_range(unsign size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; - local_irq_save(flags); + raw_local_irq_save(flags); if (size <= NTLB_ENTRIES_HALF) { int pid = read_c0_entryhi(); @@ -131,7 +131,7 @@ void local_flush_tlb_kernel_range(unsign } else { local_flush_tlb_all(); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) @@ -143,7 +143,7 @@ void local_flush_tlb_page(struct vm_area newpid = (cpu_context(smp_processor_id(), vma->vm_mm) & ASID_MASK); page &= (PAGE_MASK << 1); - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = (read_c0_entryhi() & ASID_MASK); write_c0_entryhi(page | newpid); tlb_probe(); @@ -157,7 +157,7 @@ void local_flush_tlb_page(struct vm_area finish: write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -170,7 +170,7 @@ void local_flush_tlb_one(unsigned long p unsigned long flags; int oldpid, idx; - local_irq_save(flags); + raw_local_irq_save(flags); page &= (PAGE_MASK << 1); oldpid = read_c0_entryhi() & 0xff; write_c0_entryhi(page); @@ -185,7 +185,7 @@ void local_flush_tlb_one(unsigned long p } write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* XXX Simplify this. On the R10000 writing a TLB entry for an virtual @@ -216,7 +216,7 @@ void __update_tlb(struct vm_area_struct vma->vm_mm) & ASID_MASK), pid); } - local_irq_save(flags); + raw_local_irq_save(flags); address &= (PAGE_MASK << 1); write_c0_entryhi(address | (pid)); pgdp = pgd_offset(vma->vm_mm, address); @@ -234,7 +234,7 @@ void __update_tlb(struct vm_area_struct tlb_write_indexed(); } write_c0_entryhi(pid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __init tlb_init(void) Index: linux.prev/arch/mips/mm/tlb-r3k.c =================================================================== --- linux.prev.orig/arch/mips/mm/tlb-r3k.c +++ linux.prev/arch/mips/mm/tlb-r3k.c @@ -49,7 +49,7 @@ void local_flush_tlb_all(void) printk("[tlball]"); #endif - local_irq_save(flags); + raw_local_irq_save(flags); old_ctx = read_c0_entryhi() & ASID_MASK; write_c0_entrylo0(0); entry = r3k_have_wired_reg ? read_c0_wired() : 8; @@ -60,7 +60,7 @@ void local_flush_tlb_all(void) tlb_write_indexed(); } write_c0_entryhi(old_ctx); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_mm(struct mm_struct *mm) @@ -89,7 +89,7 @@ void local_flush_tlb_range(struct vm_are printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", cpu_context(cpu, mm) & ASID_MASK, start, end); #endif - local_irq_save(flags); + raw_local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; if (size <= current_cpu_data.tlbsize) { int oldpid = read_c0_entryhi() & ASID_MASK; @@ -115,7 +115,7 @@ void local_flush_tlb_range(struct vm_are } else { drop_mmu_context(mm, cpu); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -127,7 +127,7 @@ void local_flush_tlb_kernel_range(unsign #ifdef DEBUG_TLB printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", start, end); #endif - local_irq_save(flags); + raw_local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; if (size <= current_cpu_data.tlbsize) { int pid = read_c0_entryhi(); @@ -153,7 +153,7 @@ void local_flush_tlb_kernel_range(unsign } else { local_flush_tlb_all(); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) @@ -169,7 +169,7 @@ void local_flush_tlb_page(struct vm_area #endif newpid = cpu_context(cpu, vma->vm_mm) & ASID_MASK; page &= PAGE_MASK; - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi() & ASID_MASK; write_c0_entryhi(page | newpid); BARRIER; @@ -183,7 +183,7 @@ void local_flush_tlb_page(struct vm_area finish: write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -207,7 +207,7 @@ void __update_tlb(struct vm_area_struct } #endif - local_irq_save(flags); + raw_local_irq_save(flags); address &= PAGE_MASK; write_c0_entryhi(address | pid); BARRIER; @@ -221,7 +221,7 @@ void __update_tlb(struct vm_area_struct tlb_write_indexed(); } write_c0_entryhi(pid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, @@ -240,7 +240,7 @@ void __init add_wired_entry(unsigned lon entrylo0, entryhi, pagemask); #endif - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi() & ASID_MASK; old_pagemask = read_c0_pagemask(); @@ -260,7 +260,7 @@ void __init add_wired_entry(unsigned lon write_c0_entryhi(old_ctx); write_c0_pagemask(old_pagemask); local_flush_tlb_all(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } else if (wired < 8) { #ifdef DEBUG_TLB @@ -268,7 +268,7 @@ void __init add_wired_entry(unsigned lon entrylo0, entryhi); #endif - local_irq_save(flags); + raw_local_irq_save(flags); old_ctx = read_c0_entryhi() & ASID_MASK; write_c0_entrylo0(entrylo0); write_c0_entryhi(entryhi); @@ -277,7 +277,7 @@ void __init add_wired_entry(unsigned lon tlb_write_indexed(); write_c0_entryhi(old_ctx); local_flush_tlb_all(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } } Index: linux.prev/arch/mips/mm/tlb-r4k.c =================================================================== --- linux.prev.orig/arch/mips/mm/tlb-r4k.c +++ linux.prev/arch/mips/mm/tlb-r4k.c @@ -38,7 +38,7 @@ void local_flush_tlb_all(void) unsigned long old_ctx; int entry; - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi(); write_c0_entrylo0(0); @@ -57,7 +57,7 @@ void local_flush_tlb_all(void) } tlbw_use_hazard(); write_c0_entryhi(old_ctx); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* All entries common to a mm share an asid. To effectively flush @@ -89,7 +89,7 @@ void local_flush_tlb_range(struct vm_are size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; - local_irq_save(flags); + raw_local_irq_save(flags); if (size <= current_cpu_data.tlbsize/2) { int oldpid = read_c0_entryhi(); int newpid = cpu_asid(cpu, mm); @@ -120,7 +120,7 @@ void local_flush_tlb_range(struct vm_are } else { drop_mmu_context(mm, cpu); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -131,7 +131,7 @@ void local_flush_tlb_kernel_range(unsign size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; - local_irq_save(flags); + raw_local_irq_save(flags); if (size <= current_cpu_data.tlbsize / 2) { int pid = read_c0_entryhi(); @@ -162,7 +162,7 @@ void local_flush_tlb_kernel_range(unsign } else { local_flush_tlb_all(); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) @@ -175,7 +175,7 @@ void local_flush_tlb_page(struct vm_area newpid = cpu_asid(cpu, vma->vm_mm); page &= (PAGE_MASK << 1); - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi(); write_c0_entryhi(page | newpid); mtc0_tlbw_hazard(); @@ -194,7 +194,7 @@ void local_flush_tlb_page(struct vm_area finish: write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -207,7 +207,7 @@ void local_flush_tlb_one(unsigned long p unsigned long flags; int oldpid, idx; - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi(); page &= (PAGE_MASK << 1); write_c0_entryhi(page); @@ -226,7 +226,7 @@ void local_flush_tlb_one(unsigned long p } write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -249,7 +249,7 @@ void __update_tlb(struct vm_area_struct if (current->active_mm != vma->vm_mm) return; - local_irq_save(flags); + raw_local_irq_save(flags); pid = read_c0_entryhi() & ASID_MASK; address &= (PAGE_MASK << 1); @@ -277,7 +277,7 @@ void __update_tlb(struct vm_area_struct else tlb_write_indexed(); tlbw_use_hazard(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #if 0 @@ -291,7 +291,7 @@ static void r4k_update_mmu_cache_hwbug(s pte_t *ptep; int idx; - local_irq_save(flags); + raw_local_irq_save(flags); address &= (PAGE_MASK << 1); asid = read_c0_entryhi() & ASID_MASK; write_c0_entryhi(address | asid); @@ -310,7 +310,7 @@ static void r4k_update_mmu_cache_hwbug(s else tlb_write_indexed(); tlbw_use_hazard(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #endif @@ -322,7 +322,7 @@ void __init add_wired_entry(unsigned lon unsigned long old_pagemask; unsigned long old_ctx; - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi(); old_pagemask = read_c0_pagemask(); @@ -342,7 +342,7 @@ void __init add_wired_entry(unsigned lon BARRIER; write_c0_pagemask(old_pagemask); local_flush_tlb_all(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -362,7 +362,7 @@ __init int add_temporary_entry(unsigned unsigned long old_pagemask; unsigned long old_ctx; - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi(); old_pagemask = read_c0_pagemask(); @@ -386,7 +386,7 @@ __init int add_temporary_entry(unsigned write_c0_entryhi(old_ctx); write_c0_pagemask(old_pagemask); out: - local_irq_restore(flags); + raw_local_irq_restore(flags); return ret; } Index: linux.prev/arch/mips/mm/tlb-r8k.c =================================================================== --- linux.prev.orig/arch/mips/mm/tlb-r8k.c +++ linux.prev/arch/mips/mm/tlb-r8k.c @@ -35,7 +35,7 @@ void local_flush_tlb_all(void) unsigned long old_ctx; int entry; - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi(); write_c0_entrylo(0); @@ -49,7 +49,7 @@ void local_flush_tlb_all(void) } tlbw_use_hazard(); write_c0_entryhi(old_ctx); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_mm(struct mm_struct *mm) @@ -74,7 +74,7 @@ void local_flush_tlb_range(struct vm_are size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; - local_irq_save(flags); + raw_local_irq_save(flags); if (size > TFP_TLB_SIZE / 2) { drop_mmu_context(mm, cpu); @@ -106,7 +106,7 @@ void local_flush_tlb_range(struct vm_are write_c0_entryhi(oldpid); out_restore: - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* Usable for KV1 addresses only! */ @@ -123,7 +123,7 @@ void local_flush_tlb_kernel_range(unsign return; } - local_irq_save(flags); + raw_local_irq_save(flags); write_c0_entrylo(0); @@ -145,7 +145,7 @@ void local_flush_tlb_kernel_range(unsign tlb_write(); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) @@ -160,7 +160,7 @@ void local_flush_tlb_page(struct vm_area newpid = cpu_asid(cpu, vma->vm_mm); page &= PAGE_MASK; - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi(); write_c0_vaddr(page); write_c0_entryhi(newpid); @@ -175,7 +175,7 @@ void local_flush_tlb_page(struct vm_area finish: write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -199,7 +199,7 @@ void __update_tlb(struct vm_area_struct pid = read_c0_entryhi() & ASID_MASK; - local_irq_save(flags); + raw_local_irq_save(flags); address &= PAGE_MASK; write_c0_vaddr(address); write_c0_entryhi(pid); @@ -212,7 +212,7 @@ void __update_tlb(struct vm_area_struct tlb_write(); write_c0_entryhi(pid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void __init probe_tlb(unsigned long config) Index: linux.prev/arch/mips/momentum/ocelot_g/irq.c =================================================================== --- linux.prev.orig/arch/mips/momentum/ocelot_g/irq.c +++ linux.prev/arch/mips/momentum/ocelot_g/irq.c @@ -58,7 +58,7 @@ void __init arch_init_irq(void) * int-handler is not on bootstrap */ clear_c0_status(ST0_IM); - local_irq_disable(); + raw_local_irq_disable(); /* Sets the first-level interrupt dispatcher. */ set_except_vector(0, ocelot_handle_int); Index: linux.prev/arch/mips/pci/ops-au1000.c =================================================================== --- linux.prev.orig/arch/mips/pci/ops-au1000.c +++ linux.prev/arch/mips/pci/ops-au1000.c @@ -93,7 +93,7 @@ static int config_access(unsigned char a return -1; } - local_irq_save(flags); + raw_local_irq_save(flags); au_writel(((0x2000 << 16) | (au_readl(Au1500_PCI_STATCMD) & 0xffff)), Au1500_PCI_STATCMD); au_sync_udelay(1); @@ -125,7 +125,7 @@ static int config_access(unsigned char a if (board_pci_idsel) { if (board_pci_idsel(device, 1) == 0) { *data = 0xffffffff; - local_irq_restore(flags); + raw_local_irq_restore(flags); return -1; } } @@ -184,7 +184,7 @@ static int config_access(unsigned char a (void)board_pci_idsel(device, 0); } - local_irq_restore(flags); + raw_local_irq_restore(flags); return error; #endif } Index: linux.prev/arch/mips/pmc-sierra/yosemite/smp.c =================================================================== --- linux.prev.orig/arch/mips/pmc-sierra/yosemite/smp.c +++ linux.prev/arch/mips/pmc-sierra/yosemite/smp.c @@ -19,7 +19,7 @@ static unsigned char launchstack[LAUNCHS static void __init prom_smp_bootstrap(void) { - local_irq_disable(); + raw_local_irq_disable(); while (spin_is_locked(&launch_lock)); Index: linux.prev/arch/mips/sgi-ip22/ip22-eisa.c =================================================================== --- linux.prev.orig/arch/mips/sgi-ip22/ip22-eisa.c +++ linux.prev/arch/mips/sgi-ip22/ip22-eisa.c @@ -98,13 +98,13 @@ static void enable_eisa1_irq(unsigned in unsigned long flags; u8 mask; - local_irq_save(flags); + raw_local_irq_save(flags); mask = inb(EISA_INT1_MASK); mask &= ~((u8) (1 << irq)); outb(mask, EISA_INT1_MASK); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_eisa1_irq(unsigned int irq) @@ -160,13 +160,13 @@ static void enable_eisa2_irq(unsigned in unsigned long flags; u8 mask; - local_irq_save(flags); + raw_local_irq_save(flags); mask = inb(EISA_INT2_MASK); mask &= ~((u8) (1 << (irq - 8))); outb(mask, EISA_INT2_MASK); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_eisa2_irq(unsigned int irq) Index: linux.prev/arch/mips/sgi-ip22/ip22-int.c =================================================================== --- linux.prev.orig/arch/mips/sgi-ip22/ip22-int.c +++ linux.prev/arch/mips/sgi-ip22/ip22-int.c @@ -44,12 +44,12 @@ static void enable_local0_irq(unsigned i { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* don't allow mappable interrupt to be enabled from setup_irq, * we have our own way to do so */ if (irq != SGI_MAP_0_IRQ) sgint->imask0 |= (1 << (irq - SGINT_LOCAL0)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_local0_irq(unsigned int irq) @@ -62,9 +62,9 @@ static void disable_local0_irq(unsigned { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->imask0 &= ~(1 << (irq - SGINT_LOCAL0)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #define shutdown_local0_irq disable_local0_irq @@ -90,12 +90,12 @@ static void enable_local1_irq(unsigned i { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* don't allow mappable interrupt to be enabled from setup_irq, * we have our own way to do so */ if (irq != SGI_MAP_1_IRQ) sgint->imask1 |= (1 << (irq - SGINT_LOCAL1)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_local1_irq(unsigned int irq) @@ -108,9 +108,9 @@ void disable_local1_irq(unsigned int irq { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->imask1 &= ~(1 << (irq - SGINT_LOCAL1)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #define shutdown_local1_irq disable_local1_irq @@ -136,10 +136,10 @@ static void enable_local2_irq(unsigned i { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->imask0 |= (1 << (SGI_MAP_0_IRQ - SGINT_LOCAL0)); sgint->cmeimask0 |= (1 << (irq - SGINT_LOCAL2)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_local2_irq(unsigned int irq) @@ -152,11 +152,11 @@ void disable_local2_irq(unsigned int irq { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->cmeimask0 &= ~(1 << (irq - SGINT_LOCAL2)); if (!sgint->cmeimask0) sgint->imask0 &= ~(1 << (SGI_MAP_0_IRQ - SGINT_LOCAL0)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #define shutdown_local2_irq disable_local2_irq @@ -182,10 +182,10 @@ static void enable_local3_irq(unsigned i { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->imask1 |= (1 << (SGI_MAP_1_IRQ - SGINT_LOCAL1)); sgint->cmeimask1 |= (1 << (irq - SGINT_LOCAL3)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_local3_irq(unsigned int irq) @@ -198,11 +198,11 @@ void disable_local3_irq(unsigned int irq { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->cmeimask1 &= ~(1 << (irq - SGINT_LOCAL3)); if (!sgint->cmeimask1) sgint->imask1 &= ~(1 << (SGI_MAP_1_IRQ - SGINT_LOCAL1)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #define shutdown_local3_irq disable_local3_irq Index: linux.prev/arch/mips/sgi-ip22/ip22-reset.c =================================================================== --- linux.prev.orig/arch/mips/sgi-ip22/ip22-reset.c +++ linux.prev/arch/mips/sgi-ip22/ip22-reset.c @@ -66,7 +66,7 @@ static void sgi_machine_power_off(void) { unsigned int tmp; - local_irq_disable(); + raw_local_irq_disable(); /* Disable watchdog */ tmp = hpc3c0->rtcregs[RTC_CMD] & 0xff; Index: linux.prev/arch/mips/sgi-ip27/ip27-smp.c =================================================================== --- linux.prev.orig/arch/mips/sgi-ip27/ip27-smp.c +++ linux.prev/arch/mips/sgi-ip27/ip27-smp.c @@ -179,7 +179,7 @@ void __init prom_boot_secondary(int cpu, void prom_init_secondary(void) { per_cpu_init(); - local_irq_enable(); + raw_local_irq_enable(); } void __init prom_cpus_done(void) Index: linux.prev/arch/mips/sibyte/sb1250/irq.c =================================================================== --- linux.prev.orig/arch/mips/sibyte/sb1250/irq.c +++ linux.prev/arch/mips/sibyte/sb1250/irq.c @@ -86,7 +86,7 @@ static struct hw_interrupt_type sb1250_i /* Store the CPU id (not the logical number) */ int sb1250_irq_owner[SB1250_NR_IRQS]; -DEFINE_SPINLOCK(sb1250_imr_lock); +DEFINE_RAW_SPINLOCK(sb1250_imr_lock); void sb1250_mask_irq(int cpu, int irq) { @@ -267,7 +267,7 @@ static irqreturn_t sb1250_dummy_handler static struct irqaction sb1250_dummy_action = { .handler = sb1250_dummy_handler, - .flags = 0, + .flags = SA_NODELAY, .mask = CPU_MASK_NONE, .name = "sb1250-private", .next = NULL, Index: linux.prev/arch/mips/sibyte/sb1250/smp.c =================================================================== --- linux.prev.orig/arch/mips/sibyte/sb1250/smp.c +++ linux.prev/arch/mips/sibyte/sb1250/smp.c @@ -59,7 +59,7 @@ void sb1250_smp_finish(void) { extern void sb1250_time_init(void); sb1250_time_init(); - local_irq_enable(); + raw_local_irq_enable(); } /* Index: linux.prev/arch/mips/sni/reset.c =================================================================== --- linux.prev.orig/arch/mips/sni/reset.c +++ linux.prev/arch/mips/sni/reset.c @@ -30,7 +30,7 @@ void sni_machine_restart(char *command) /* This does a normal via the keyboard controller like a PC. We can do that easier ... */ - local_irq_disable(); + raw_local_irq_disable(); for (;;) { for (i=0; i<100; i++) { kb_wait(); Index: linux.prev/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c =================================================================== --- linux.prev.orig/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c +++ linux.prev/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c @@ -669,7 +669,7 @@ void __init arch_init_irq(void) { extern void tx4927_irq_init(void); - local_irq_disable(); + raw_local_irq_disable(); tx4927_irq_init(); toshiba_rbtx4927_irq_ioc_init(); Index: linux.prev/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c =================================================================== --- linux.prev.orig/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c +++ linux.prev/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c @@ -732,7 +732,7 @@ void toshiba_rbtx4927_restart(char *comm reg_wr08(RBTX4927_SW_RESET_DO, RBTX4927_SW_RESET_DO_SET); /* do something passive while waiting for reset */ - local_irq_disable(); + raw_local_irq_disable(); while (1) asm_wait(); @@ -743,7 +743,7 @@ void toshiba_rbtx4927_restart(char *comm void toshiba_rbtx4927_halt(void) { printk(KERN_NOTICE "System Halted\n"); - local_irq_disable(); + raw_local_irq_disable(); while (1) { asm_wait(); } Index: linux.prev/arch/mips/vr41xx/common/pmu.c =================================================================== --- linux.prev.orig/arch/mips/vr41xx/common/pmu.c +++ linux.prev/arch/mips/vr41xx/common/pmu.c @@ -62,7 +62,7 @@ static inline void software_reset(void) static void vr41xx_restart(char *command) { - local_irq_disable(); + raw_local_irq_disable(); software_reset(); printk(KERN_NOTICE "\nYou can reset your system\n"); while (1) ; @@ -70,14 +70,14 @@ static void vr41xx_restart(char *command static void vr41xx_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); printk(KERN_NOTICE "\nYou can turn off the power supply\n"); while (1) ; } static void vr41xx_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); printk(KERN_NOTICE "\nYou can turn off the power supply\n"); while (1) ; } Index: linux.prev/arch/powerpc/Kconfig =================================================================== --- linux.prev.orig/arch/powerpc/Kconfig +++ linux.prev/arch/powerpc/Kconfig @@ -33,13 +33,6 @@ config GENERIC_HARDIRQS bool default y -config RWSEM_GENERIC_SPINLOCK - bool - -config RWSEM_XCHGADD_ALGORITHM - bool - default y - config GENERIC_CALIBRATE_DELAY bool default y @@ -484,6 +477,18 @@ config HIGHMEM source kernel/Kconfig.hz source kernel/Kconfig.preempt + +config RWSEM_GENERIC_SPINLOCK + bool + default y + +config ASM_SEMAPHORES + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + source "fs/Kconfig.binfmt" # We optimistically allocate largepages from the VM, so make the limit Index: linux.prev/arch/powerpc/boot/Makefile =================================================================== --- linux.prev.orig/arch/powerpc/boot/Makefile +++ linux.prev/arch/powerpc/boot/Makefile @@ -28,6 +28,14 @@ BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAG BOOTLFLAGS := -T $(srctree)/$(src)/zImage.lds OBJCOPYFLAGS := contents,alloc,load,readonly,data +ifdef CONFIG_MCOUNT +# do not trace the boot loader +nullstring := +space := $(nullstring) # end of the line +pg_flag = $(nullstring) -pg # end of the line +CFLAGS := $(subst ${pg_flag},${space},${CFLAGS}) +endif + zlib := infblock.c infcodes.c inffast.c inflate.c inftrees.c infutil.c zlibheader := infblock.h infcodes.h inffast.h inftrees.h infutil.h zliblinuxheader := zlib.h zconf.h zutil.h @@ -43,7 +51,7 @@ obj-boot := $(addsuffix .o, $(basename $ BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj) quiet_cmd_copy_zlib = COPY $@ - cmd_copy_zlib = sed "s@__attribute_used__@@;s@]\+\).*@\"\1\"@" $< > $@ + cmd_copy_zlib = sed "s@__attribute_used__@@;s@.include.@@;s@.include.@@;s@.*spin.*lock.*@@;s@.*SPINLOCK.*@@;s@]\+\).*@\"\1\"@" $< > $@ quiet_cmd_copy_zlibheader = COPY $@ cmd_copy_zlibheader = sed "s@]\+\).*@\"\1\"@" $< > $@ Index: linux.prev/arch/powerpc/kernel/Makefile =================================================================== --- linux.prev.orig/arch/powerpc/kernel/Makefile +++ linux.prev/arch/powerpc/kernel/Makefile @@ -11,9 +11,10 @@ CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC endif -obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ +obj-y := cputable.o ptrace.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o obj-y += vdso32/ +obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \ signal_64.o ptrace32.o systbl.o \ paca.o ioctl32.o cpu_setup_power4.o \ Index: linux.prev/arch/powerpc/kernel/entry_32.S =================================================================== --- linux.prev.orig/arch/powerpc/kernel/entry_32.S +++ linux.prev/arch/powerpc/kernel/entry_32.S @@ -239,7 +239,7 @@ ret_from_syscall: SYNC MTMSRD(r10) lwz r9,TI_FLAGS(r12) - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne- syscall_exit_work syscall_exit_cont: #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) @@ -317,7 +317,7 @@ syscall_exit_work: rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ lwz r9,TI_FLAGS(r12) 5: - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne 1f lwz r5,_MSR(r1) andi. r5,r5,MSR_PR @@ -658,7 +658,7 @@ user_exc_return: /* r10 contains MSR_KE /* Check current_thread_info()->flags */ rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r9,TI_FLAGS(r9) - andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED) + andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne do_work restore_user: @@ -876,7 +876,7 @@ load_dbcr0: #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ do_work: /* r10 contains MSR_KERNEL here */ - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ @@ -998,3 +998,85 @@ machine_check_in_rtas: /* XXX load up BATs and panic */ #endif /* CONFIG_PPC_RTAS */ + +#ifdef CONFIG_MCOUNT +/* + * mcount() is not the same as _mcount(). The callers of mcount() have a + * normal context. The callers of _mcount() do not have a stack frame and + * have not saved the "caller saves" registers. + */ +_GLOBAL(mcount) + stwu r1,-16(r1) + mflr r3 + lis r5,mcount_enabled@ha + lwz r5,mcount_enabled@l(r5) + stw r3,20(r1) + cmpwi r5,0 + beq 1f + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + lwz r4,16(r1) + lwz r4,4(r4) + bl __trace +1: + lwz r0,20(r1) + mtlr r0 + addi r1,r1,16 + blr + +/* + * The -pg flag, which is specified in the case of CONFIG_MCOUNT, causes the + * C compiler to add a call to _mcount() at the start of each function + * preamble, before the stack frame is created. An example of this preamble + * code is: + * + * mflr r0 + * lis r12,-16354 + * stw r0,4(r1) + * addi r0,r12,-19652 + * bl 0xc00034c8 <_mcount> + * mflr r0 + * stwu r1,-16(r1) + */ +_GLOBAL(_mcount) +#define M_STK_SIZE 48 + /* Would not expect to need to save cr, but glibc version of */ + /* _mcount() does, so cautiously saving it here too. */ + stwu r1,-M_STK_SIZE(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r3 /* will use as first arg to __trace() */ + mfcr r4 + lis r5,mcount_enabled@ha + lwz r5,mcount_enabled@l(r5) + cmpwi r5,0 + stw r3, 44(r1) /* lr */ + stw r4, 8(r1) /* cr */ + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + beq 1f + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + lwz r4,M_STK_SIZE+4(r1) + bl __trace +1: + lwz r8, 8(r1) /* cr */ + lwz r9, 44(r1) /* lr */ + lwz r3, 12(r1) + lwz r4, 16(r1) + lwz r5, 20(r1) + mtcrf 0xff,r8 + mtctr r9 + lwz r0, 52(r1) + lwz r6, 24(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + lwz r9, 36(r1) + lwz r10,40(r1) + addi r1,r1,M_STK_SIZE + mtlr r0 + bctr + +#endif /* CONFIG_MCOUNT */ Index: linux.prev/arch/powerpc/kernel/idle_64.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/idle_64.c +++ linux.prev/arch/powerpc/kernel/idle_64.c @@ -37,7 +37,7 @@ void default_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { while (!need_resched() && !cpu_is_offline(cpu)) { ppc64_runlatch_off(); @@ -53,9 +53,11 @@ void default_idle(void) } ppc64_runlatch_on(); - preempt_enable_no_resched(); - schedule(); + raw_local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + raw_local_irq_enable(); if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); } @@ -71,9 +73,11 @@ void native_idle(void) if (need_resched()) { ppc64_runlatch_on(); - preempt_enable_no_resched(); - schedule(); + raw_local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + raw_local_irq_enable(); } if (cpu_is_offline(smp_processor_id()) && Index: linux.prev/arch/powerpc/kernel/init_task.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/init_task.c +++ linux.prev/arch/powerpc/kernel/init_task.c @@ -3,12 +3,12 @@ #include #include #include -#include +#include #include #include -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); Index: linux.prev/arch/powerpc/kernel/irq.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/irq.c +++ linux.prev/arch/powerpc/kernel/irq.c @@ -100,8 +100,6 @@ extern atomic_t ipi_sent; #endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC64 -EXPORT_SYMBOL(irq_desc); - int distribute_irqs = 1; u64 ppc64_interrupt_controller; #endif /* CONFIG_PPC64 */ Index: linux.prev/arch/powerpc/kernel/ppc_ksyms.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/ppc_ksyms.c +++ linux.prev/arch/powerpc/kernel/ppc_ksyms.c @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -221,16 +220,11 @@ EXPORT_SYMBOL(screen_info); #ifdef CONFIG_PPC32 EXPORT_SYMBOL(__delay); EXPORT_SYMBOL(timer_interrupt); -EXPORT_SYMBOL(irq_desc); EXPORT_SYMBOL(tb_ticks_per_jiffy); EXPORT_SYMBOL(console_drivers); EXPORT_SYMBOL(cacheable_memcpy); #endif -EXPORT_SYMBOL(__up); -EXPORT_SYMBOL(__down); -EXPORT_SYMBOL(__down_interruptible); - #ifdef CONFIG_8xx EXPORT_SYMBOL(cpm_install_handler); EXPORT_SYMBOL(cpm_free_handler); Index: linux.prev/arch/powerpc/kernel/process.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/process.c +++ linux.prev/arch/powerpc/kernel/process.c @@ -327,10 +327,10 @@ struct task_struct *__switch_to(struct t } #endif - local_irq_save(flags); + raw_local_irq_save(flags); last = _switch(old_thread, new_thread); - local_irq_restore(flags); + raw_local_irq_restore(flags); return last; } Index: linux.prev/arch/powerpc/kernel/rtas.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/rtas.c +++ linux.prev/arch/powerpc/kernel/rtas.c @@ -31,7 +31,7 @@ #include struct rtas_t rtas = { - .lock = SPIN_LOCK_UNLOCKED + .lock = SPIN_LOCK_UNLOCKED(rtas.lock) }; EXPORT_SYMBOL(rtas); @@ -620,7 +620,7 @@ void rtas_stop_self(void) { struct rtas_args *rtas_args = &rtas_stop_self_args; - local_irq_disable(); + raw_local_irq_disable(); BUG_ON(rtas_args->token == RTAS_UNKNOWN_SERVICE); Index: linux.prev/arch/powerpc/kernel/semaphore.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/semaphore.c +++ linux.prev/arch/powerpc/kernel/semaphore.c @@ -31,7 +31,7 @@ * sem->count = tmp; * return old_count; */ -static inline int __sem_update_count(struct semaphore *sem, int incr) +static inline int __sem_update_count(struct compat_semaphore *sem, int incr) { int old_count, tmp; @@ -50,7 +50,7 @@ static inline int __sem_update_count(str return old_count; } -void __up(struct semaphore *sem) +void __compat_up(struct compat_semaphore *sem) { /* * Note that we incremented count in up() before we came here, @@ -63,7 +63,7 @@ void __up(struct semaphore *sem) __sem_update_count(sem, 1); wake_up(&sem->wait); } -EXPORT_SYMBOL(__up); +EXPORT_SYMBOL(__compat_up); /* * Note that when we come in to __down or __down_interruptible, @@ -73,7 +73,7 @@ EXPORT_SYMBOL(__up); * Thus it is only when we decrement count from some value > 0 * that we have actually got the semaphore. */ -void __sched __down(struct semaphore *sem) +void __sched __compat_down(struct compat_semaphore *sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -101,9 +101,9 @@ void __sched __down(struct semaphore *se */ wake_up(&sem->wait); } -EXPORT_SYMBOL(__down); +EXPORT_SYMBOL(__compat_down); -int __sched __down_interruptible(struct semaphore * sem) +int __sched __compat_down_interruptible(struct compat_semaphore *sem) { int retval = 0; struct task_struct *tsk = current; @@ -132,4 +132,10 @@ int __sched __down_interruptible(struct wake_up(&sem->wait); return retval; } -EXPORT_SYMBOL(__down_interruptible); +EXPORT_SYMBOL(__compat_down_interruptible); + +int compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} +EXPORT_SYMBOL(compat_sem_is_locked); Index: linux.prev/arch/powerpc/kernel/setup-common.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/setup-common.c +++ linux.prev/arch/powerpc/kernel/setup-common.c @@ -105,7 +105,7 @@ void machine_restart(char *cmd) smp_send_stop(); #endif printk(KERN_EMERG "System Halted, OK to turn off power\n"); - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } @@ -117,7 +117,7 @@ void machine_power_off(void) smp_send_stop(); #endif printk(KERN_EMERG "System Halted, OK to turn off power\n"); - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } /* Used by the G5 thermal driver */ @@ -134,7 +134,7 @@ void machine_halt(void) smp_send_stop(); #endif printk(KERN_EMERG "System Halted, OK to turn off power\n"); - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } Index: linux.prev/arch/powerpc/kernel/smp-tbsync.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/smp-tbsync.c +++ linux.prev/arch/powerpc/kernel/smp-tbsync.c @@ -47,7 +47,7 @@ void __devinit smp_generic_take_timebase int cmd; u64 tb; - local_irq_disable(); + raw_local_irq_disable(); while (!running) barrier(); rmb(); @@ -71,7 +71,7 @@ void __devinit smp_generic_take_timebase set_tb(tb >> 32, tb & 0xfffffffful); enter_contest(tbsync->mark, -1); } - local_irq_enable(); + raw_local_irq_enable(); } static int __devinit start_contest(int cmd, long offset, int num) @@ -82,7 +82,7 @@ static int __devinit start_contest(int c tbsync->cmd = cmd; - local_irq_disable(); + raw_local_irq_disable(); for (i = -3; i < num; ) { tb = get_tb() + 400; tbsync->tb = tb + offset; @@ -105,7 +105,7 @@ static int __devinit start_contest(int c if (i++ > 0) score += tbsync->race_result; } - local_irq_enable(); + raw_local_irq_enable(); return score; } Index: linux.prev/arch/powerpc/kernel/smp.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/smp.c +++ linux.prev/arch/powerpc/kernel/smp.c @@ -140,6 +140,16 @@ void smp_send_reschedule(int cpu) smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE); } +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_RESCHEDULE); +} + #ifdef CONFIG_DEBUGGER void smp_send_debugger_break(int cpu) { @@ -149,7 +159,7 @@ void smp_send_debugger_break(int cpu) static void stop_this_cpu(void *dummy) { - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } @@ -164,7 +174,7 @@ void smp_send_stop(void) * static memory requirements. It also looks cleaner. * Stolen from the i386 version. */ -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock); +static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(call_lock); static struct call_data_struct { void (*func) (void *info); @@ -200,7 +210,7 @@ int smp_call_function (void (*func) (voi u64 timeout; /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); + WARN_ON(raw_irqs_disabled()); data.func = func; data.info = info; @@ -529,7 +539,7 @@ int __devinit start_secondary(void *unus cpu_set(cpu, cpu_online_map); spin_unlock(&call_lock); - local_irq_enable(); + raw_local_irq_enable(); cpu_idle(); return 0; Index: linux.prev/arch/powerpc/kernel/time.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/time.c +++ linux.prev/arch/powerpc/kernel/time.c @@ -72,6 +72,9 @@ #endif #include +unsigned long cpu_khz; /* Detected as we calibrate the TSC */ +EXPORT_SYMBOL(cpu_khz); + /* keep track of when we need to update the rtc */ time_t last_rtc_update; extern int piranha_simulator; @@ -100,7 +103,7 @@ unsigned long tb_ticks_per_sec; u64 tb_to_xs; unsigned tb_to_us; unsigned long processor_freq; -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); EXPORT_SYMBOL_GPL(rtc_lock); u64 tb_to_ns_scale; @@ -335,7 +338,7 @@ static __inline__ void timer_recalc_offs } #ifdef CONFIG_SMP -unsigned long profile_pc(struct pt_regs *regs) +unsigned long notrace profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); @@ -698,6 +701,7 @@ void __init time_init(void) tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); div128_by_32(1024*1024, 0, tb_ticks_per_sec, &res); tb_to_xs = res.result_low; + cpu_khz = ppc_tb_freq / 1000; #ifdef CONFIG_PPC64 get_paca()->default_decr = tb_ticks_per_jiffy; Index: linux.prev/arch/powerpc/kernel/traps.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/traps.c +++ linux.prev/arch/powerpc/kernel/traps.c @@ -91,7 +91,7 @@ int register_die_notifier(struct notifie * Trap & Exception support */ -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); int die(const char *str, struct pt_regs *regs, long err) { @@ -182,6 +182,11 @@ void _exception(int signr, struct pt_reg return; } +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif + memset(&info, 0, sizeof(info)); info.si_signo = signr; info.si_code = code; Index: linux.prev/arch/powerpc/lib/locks.c =================================================================== --- linux.prev.orig/arch/powerpc/lib/locks.c +++ linux.prev/arch/powerpc/lib/locks.c @@ -25,7 +25,7 @@ #include #include -void __spin_yield(raw_spinlock_t *lock) +void __spin_yield(__raw_spinlock_t *lock) { unsigned int lock_value, holder_cpu, yield_count; struct paca_struct *holder_paca; @@ -84,7 +84,7 @@ void __rw_yield(raw_rwlock_t *rw) } #endif -void __raw_spin_unlock_wait(raw_spinlock_t *lock) +void __raw_spin_unlock_wait(__raw_spinlock_t *lock) { while (lock->slock) { HMT_low(); Index: linux.prev/arch/powerpc/mm/fault.c =================================================================== --- linux.prev.orig/arch/powerpc/mm/fault.c +++ linux.prev/arch/powerpc/mm/fault.c @@ -117,8 +117,8 @@ static void do_dabr(struct pt_regs *regs * The return value is 0 if the fault was handled, or the signal * number if this is a kernel fault that can't be handled here. */ -int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, - unsigned long error_code) +int __kprobes notrace do_page_fault(struct pt_regs *regs, + unsigned long address, unsigned long error_code) { struct vm_area_struct * vma; struct mm_struct *mm = current->mm; Index: linux.prev/arch/powerpc/mm/init_32.c =================================================================== --- linux.prev.orig/arch/powerpc/mm/init_32.c +++ linux.prev/arch/powerpc/mm/init_32.c @@ -57,7 +57,7 @@ #endif #define MAX_LOW_MEM CONFIG_LOWMEM_SIZE -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long total_memory; unsigned long total_lowmem; Index: linux.prev/arch/powerpc/mm/tlb_64.c =================================================================== --- linux.prev.orig/arch/powerpc/mm/tlb_64.c +++ linux.prev/arch/powerpc/mm/tlb_64.c @@ -38,7 +38,7 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, p /* This is declared as we are using the more or less generic * include/asm-ppc64/tlb.h file -- tgall */ -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); unsigned long pte_freelist_forced_free; Index: linux.prev/arch/powerpc/platforms/cell/smp.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/cell/smp.c +++ linux.prev/arch/powerpc/platforms/cell/smp.c @@ -134,7 +134,7 @@ static void __devinit smp_iic_setup_cpu( iic_setup_cpu(); } -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned long timebase = 0; static void __devinit cell_give_timebase(void) Index: linux.prev/arch/powerpc/platforms/chrp/smp.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/chrp/smp.c +++ linux.prev/arch/powerpc/platforms/chrp/smp.c @@ -47,7 +47,7 @@ static void __devinit smp_chrp_setup_cpu mpic_setup_this_cpu(); } -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned int timebase_upper = 0, timebase_lower = 0; void __devinit smp_chrp_give_timebase(void) Index: linux.prev/arch/powerpc/platforms/chrp/time.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/chrp/time.c +++ linux.prev/arch/powerpc/platforms/chrp/time.c @@ -28,7 +28,7 @@ #include #include -extern spinlock_t rtc_lock; +extern raw_spinlock_t rtc_lock; static int nvram_as1 = NVRAM_AS1; static int nvram_as0 = NVRAM_AS0; Index: linux.prev/arch/powerpc/platforms/iseries/setup.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/iseries/setup.c +++ linux.prev/arch/powerpc/platforms/iseries/setup.c @@ -673,16 +673,18 @@ static void yield_shared_processor(void) static void iseries_shared_idle(void) { while (1) { - while (!need_resched() && !hvlpevent_is_pending()) { - local_irq_disable(); + while (!need_resched() && !need_resched_delayed() + && !hvlpevent_is_pending()) { + raw_local_irq_disable(); ppc64_runlatch_off(); /* Recheck with irqs off */ - if (!need_resched() && !hvlpevent_is_pending()) + if (!need_resched() && !need_resched_delayed() + && !hvlpevent_is_pending()) yield_shared_processor(); HMT_medium(); - local_irq_enable(); + raw_local_irq_enable(); } ppc64_runlatch_on(); Index: linux.prev/arch/powerpc/platforms/powermac/feature.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/powermac/feature.c +++ linux.prev/arch/powerpc/platforms/powermac/feature.c @@ -63,7 +63,7 @@ extern struct device_node *k2_skiplist[2 * We use a single global lock to protect accesses. Each driver has * to take care of its own locking */ -static DEFINE_SPINLOCK(feature_lock); +static DEFINE_RAW_SPINLOCK(feature_lock); #define LOCK(flags) spin_lock_irqsave(&feature_lock, flags); #define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags); Index: linux.prev/arch/powerpc/platforms/powermac/nvram.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/powermac/nvram.c +++ linux.prev/arch/powerpc/platforms/powermac/nvram.c @@ -81,7 +81,7 @@ static int is_core_99; static int core99_bank = 0; static int nvram_partitions[3]; // XXX Turn that into a sem -static DEFINE_SPINLOCK(nv_lock); +static DEFINE_RAW_SPINLOCK(nv_lock); extern int pmac_newworld; extern int system_running; Index: linux.prev/arch/powerpc/platforms/powermac/pic.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/powermac/pic.c +++ linux.prev/arch/powerpc/platforms/powermac/pic.c @@ -69,7 +69,7 @@ static int max_irqs; static int max_real_irqs; static u32 level_mask[4]; -static DEFINE_SPINLOCK(pmac_pic_lock); +static DEFINE_RAW_SPINLOCK(pmac_pic_lock); #define GATWICK_IRQ_POOL_SIZE 10 static struct interrupt_info gatwick_int_pool[GATWICK_IRQ_POOL_SIZE]; Index: linux.prev/arch/powerpc/platforms/powermac/smp.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/powermac/smp.c +++ linux.prev/arch/powerpc/platforms/powermac/smp.c @@ -436,7 +436,7 @@ struct smp_ops_t psurge_smp_ops = { static struct device_node *pmac_tb_clock_chip_host; static u8 pmac_tb_pulsar_addr; static void (*pmac_tb_freeze)(int freeze); -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned long timebase; static void smp_core99_cypress_tb_freeze(int freeze) Index: linux.prev/arch/powerpc/platforms/pseries/setup.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/pseries/setup.c +++ linux.prev/arch/powerpc/platforms/pseries/setup.c @@ -336,7 +336,7 @@ static void __init pSeries_discover_pic static void pSeries_mach_cpu_die(void) { - local_irq_disable(); + raw_local_irq_disable(); idle_task_exit(); /* Some hardware requires clearing the CPPR, while other hardware does not * it is safe either way @@ -458,7 +458,7 @@ static inline void dedicated_idle_sleep( /* Only sleep if the other thread is not idle */ if (!(ppaca->lppaca.idle)) { - local_irq_disable(); + raw_local_irq_disable(); /* * We are about to sleep the thread and so wont be polling any @@ -474,10 +474,10 @@ static inline void dedicated_idle_sleep( * a prod occurs. Returning from the cede enables external * interrupts. */ - if (!need_resched()) + if (!need_resched() && !need_resched_delayed()) cede_processor(); else - local_irq_enable(); + raw_local_irq_enable(); set_thread_flag(TIF_POLLING_NRFLAG); } else { /* @@ -552,8 +552,9 @@ static void pseries_shared_idle(void) */ lpaca->lppaca.idle = 1; - while (!need_resched() && !cpu_is_offline(cpu)) { - local_irq_disable(); + while (!need_resched() && !need_resched_delayed() && + !cpu_is_offline(cpu)) { + raw_local_irq_disable(); ppc64_runlatch_off(); /* @@ -569,7 +570,7 @@ static void pseries_shared_idle(void) if (!need_resched()) cede_processor(); else - local_irq_enable(); + raw_local_irq_enable(); HMT_medium(); } @@ -577,8 +578,8 @@ static void pseries_shared_idle(void) lpaca->lppaca.idle = 0; ppc64_runlatch_on(); - preempt_enable_no_resched(); - schedule(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) Index: linux.prev/arch/powerpc/platforms/pseries/smp.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/pseries/smp.c +++ linux.prev/arch/powerpc/platforms/pseries/smp.c @@ -345,7 +345,7 @@ static void __devinit smp_xics_setup_cpu } #endif /* CONFIG_XICS */ -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned long timebase = 0; static void __devinit pSeries_give_timebase(void) Index: linux.prev/arch/powerpc/xmon/xmon.c =================================================================== --- linux.prev.orig/arch/powerpc/xmon/xmon.c +++ linux.prev/arch/powerpc/xmon/xmon.c @@ -522,10 +522,10 @@ irqreturn_t xmon_irq(int irq, void *d, struct pt_regs *regs) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); printf("Keyboard interrupt\n"); xmon(regs); - local_irq_restore(flags); + raw_local_irq_restore(flags); return IRQ_HANDLED; } Index: linux.prev/arch/ppc/8260_io/enet.c =================================================================== --- linux.prev.orig/arch/ppc/8260_io/enet.c +++ linux.prev/arch/ppc/8260_io/enet.c @@ -116,7 +116,7 @@ struct scc_enet_private { scc_t *sccp; struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; }; static int scc_enet_open(struct net_device *dev); Index: linux.prev/arch/ppc/8260_io/fcc_enet.c =================================================================== --- linux.prev.orig/arch/ppc/8260_io/fcc_enet.c +++ linux.prev/arch/ppc/8260_io/fcc_enet.c @@ -377,7 +377,7 @@ struct fcc_enet_private { volatile fcc_enet_t *ep; struct net_device_stats stats; uint tx_free; - spinlock_t lock; + raw_spinlock_t lock; #ifdef CONFIG_USE_MDIO uint phy_id; Index: linux.prev/arch/ppc/8xx_io/commproc.c =================================================================== --- linux.prev.orig/arch/ppc/8xx_io/commproc.c +++ linux.prev/arch/ppc/8xx_io/commproc.c @@ -356,7 +356,7 @@ cpm_setbrg(uint brg, uint rate) /* * dpalloc / dpfree bits. */ -static spinlock_t cpm_dpmem_lock; +static raw_spinlock_t cpm_dpmem_lock; /* * 16 blocks should be enough to satisfy all requests * until the memory subsystem goes up... Index: linux.prev/arch/ppc/8xx_io/enet.c =================================================================== --- linux.prev.orig/arch/ppc/8xx_io/enet.c +++ linux.prev/arch/ppc/8xx_io/enet.c @@ -144,7 +144,7 @@ struct scc_enet_private { unsigned char *rx_vaddr[RX_RING_SIZE]; struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; }; static int scc_enet_open(struct net_device *dev); Index: linux.prev/arch/ppc/8xx_io/fec.c =================================================================== --- linux.prev.orig/arch/ppc/8xx_io/fec.c +++ linux.prev/arch/ppc/8xx_io/fec.c @@ -165,7 +165,7 @@ struct fec_enet_private { struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; #ifdef CONFIG_USE_MDIO uint phy_id; Index: linux.prev/arch/ppc/Kconfig =================================================================== --- linux.prev.orig/arch/ppc/Kconfig +++ linux.prev/arch/ppc/Kconfig @@ -15,13 +15,6 @@ config GENERIC_HARDIRQS bool default y -config RWSEM_GENERIC_SPINLOCK - bool - -config RWSEM_XCHGADD_ALGORITHM - bool - default y - config GENERIC_CALIBRATE_DELAY bool default y @@ -950,6 +943,18 @@ config HIGHMEM source kernel/Kconfig.hz source kernel/Kconfig.preempt + +config RWSEM_GENERIC_SPINLOCK + bool + default y + +config ASM_SEMAPHORES + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + source "mm/Kconfig" source "fs/Kconfig.binfmt" Index: linux.prev/arch/ppc/boot/Makefile =================================================================== --- linux.prev.orig/arch/ppc/boot/Makefile +++ linux.prev/arch/ppc/boot/Makefile @@ -11,6 +11,15 @@ # CFLAGS += -fno-builtin -D__BOOTER__ -Iarch/$(ARCH)/boot/include + +ifdef CONFIG_MCOUNT +# do not trace the boot loader +nullstring := +space := $(nullstring) # end of the line +pg_flag = $(nullstring) -pg # end of the line +CFLAGS := $(subst ${pg_flag},${space},${CFLAGS}) +endif + HOSTCFLAGS += -Iarch/$(ARCH)/boot/include BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd Index: linux.prev/arch/ppc/boot/lib/Makefile =================================================================== --- linux.prev.orig/arch/ppc/boot/lib/Makefile +++ linux.prev/arch/ppc/boot/lib/Makefile @@ -5,19 +5,49 @@ CFLAGS_kbd.o := -Idrivers/char CFLAGS_vreset.o := -I$(srctree)/arch/ppc/boot/include -zlib := infblock.c infcodes.c inffast.c inflate.c inftrees.c infutil.c - -lib-y += $(zlib:.c=.o) div64.o -lib-$(CONFIG_VGA_CONSOLE) += vreset.o kbd.o - +zlib := infblock.c infcodes.c inffast.c inflate.c inftrees.c infutil.c +zlibheader := infblock.h infcodes.h inffast.h inftrees.h infutil.h +zliblinuxheader := zlib.h zconf.h zutil.h + +$(addprefix $(obj)/,$(zlib)): $(addprefix $(obj)/,$(zliblinuxheader)) $(addprefix $(obj)/,$(zlibheader)) + +src-boot := div64.S +src-boot += $(zlib) +#src-boot := $(addprefix $(obj)/, $(src-boot)) +obj-boot := $(addsuffix .o, $(basename $(src-boot))) -# zlib files needs header from their original place -EXTRA_CFLAGS += -Ilib/zlib_inflate +BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj) $(CFLAGS) quiet_cmd_copy_zlib = COPY $@ - cmd_copy_zlib = cat $< > $@ + cmd_copy_zlib = sed "s@__attribute_used__@@;s@.include.@@;s@.include.@@;s@.*spin.*lock.*@@;s@.*SPINLOCK.*@@;s@]\+\).*@\"\1\"@" $< > $@ + +quiet_cmd_copy_zlibheader = COPY $@ + cmd_copy_zlibheader = sed "s@]\+\).*@\"\1\"@" $< > $@ +# stddef.h for NULL +quiet_cmd_copy_zliblinuxheader = COPY $@ + cmd_copy_zliblinuxheader = sed "s@.include.@@;s@.include.@@;s@@@;s@]\+\).*@\"\1\"@" $< > $@ $(addprefix $(obj)/,$(zlib)): $(obj)/%: $(srctree)/lib/zlib_inflate/% $(call cmd,copy_zlib) -clean-files := $(zlib) +$(addprefix $(obj)/,$(zlibheader)): $(obj)/%: $(srctree)/lib/zlib_inflate/% + $(call cmd,copy_zlibheader) + +$(addprefix $(obj)/,$(zliblinuxheader)): $(obj)/%: $(srctree)/include/linux/% + $(call cmd,copy_zliblinuxheader) + +clean-files := $(zlib) $(zlibheader) $(zliblinuxheader) + +quiet_cmd_bootcc = BOOTCC $@ + cmd_bootcc = $(CC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $< + +quiet_cmd_bootas = BOOTAS $@ + cmd_bootas = $(CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $< + +$(patsubst %.c,%.o, $(filter %.c, $(src-boot))): %.o: %.c + $(call if_changed_dep,bootcc) +$(patsubst %.S,%.o, $(filter %.S, $(src-boot))): %.o: %.S + $(call if_changed_dep,bootas) + +lib-y += $(obj-boot) +lib-$(CONFIG_VGA_CONSOLE) += vreset.o kbd.o Index: linux.prev/arch/ppc/kernel/dma-mapping.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/dma-mapping.c +++ linux.prev/arch/ppc/kernel/dma-mapping.c @@ -71,7 +71,7 @@ int map_page(unsigned long va, phys_addr * This is the page table (2MB) covering uncached, DMA consistent allocations */ static pte_t *consistent_pte; -static DEFINE_SPINLOCK(consistent_lock); +static DEFINE_RAW_SPINLOCK(consistent_lock); /* * VM region handling support. @@ -403,7 +403,7 @@ static inline void __dma_sync_page_highm int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE; int seg_nr = 0; - local_irq_save(flags); + raw_local_irq_save(flags); do { start = (unsigned long)kmap_atomic(page + seg_nr, @@ -422,7 +422,7 @@ static inline void __dma_sync_page_highm seg_offset = 0; } while (seg_nr < nr_segs); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #endif /* CONFIG_HIGHMEM */ Index: linux.prev/arch/ppc/kernel/entry.S =================================================================== --- linux.prev.orig/arch/ppc/kernel/entry.S +++ linux.prev/arch/ppc/kernel/entry.S @@ -239,7 +239,7 @@ ret_from_syscall: SYNC MTMSRD(r10) lwz r9,TI_FLAGS(r12) - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne- syscall_exit_work syscall_exit_cont: #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) @@ -317,7 +317,7 @@ syscall_exit_work: rlwinm r12,r1,0,0,18 /* current_thread_info() */ lwz r9,TI_FLAGS(r12) 5: - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne 1f lwz r5,_MSR(r1) andi. r5,r5,MSR_PR @@ -658,7 +658,7 @@ user_exc_return: /* r10 contains MSR_KE /* Check current_thread_info()->flags */ rlwinm r9,r1,0,0,18 lwz r9,TI_FLAGS(r9) - andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED) + andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne do_work restore_user: @@ -876,7 +876,7 @@ load_dbcr0: #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ do_work: /* r10 contains MSR_KERNEL here */ - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ @@ -890,7 +890,7 @@ recheck: MTMSRD(r10) /* disable interrupts */ rlwinm r9,r1,0,0,18 lwz r9,TI_FLAGS(r9) - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne- do_resched andi. r0,r9,_TIF_SIGPENDING beq restore_user @@ -1000,3 +1000,85 @@ machine_check_in_rtas: /* XXX load up BATs and panic */ #endif /* CONFIG_PPC_OF */ + +#ifdef CONFIG_MCOUNT + +/* + * mcount() is not the same as _mcount(). The callers of mcount() have a + * normal context. The callers of _mcount() do not have a stack frame and + * have not saved the "caller saves" registers. + */ +_GLOBAL(mcount) + stwu r1,-16(r1) + mflr r3 + lis r5,mcount_enabled@ha + lwz r5,mcount_enabled@l(r5) + stw r3,20(r1) + cmpwi r5,0 + beq 1f + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + lwz r4,16(r1) + lwz r4,4(r4) + bl __trace +1: + lwz r0,20(r1) + mtlr r0 + addi r1,r1,16 + blr + +/* + * The -pg flag, which is specified in the case of CONFIG_MCOUNT, causes the + * C compiler to add a call to _mcount() at the start of each function preamble, + * before the stack frame is created. An example of this preamble code is: + * + * mflr r0 + * lis r12,-16354 + * stw r0,4(r1) + * addi r0,r12,-19652 + * bl 0xc00034c8 <_mcount> + * mflr r0 + * stwu r1,-16(r1) + */ +_GLOBAL(_mcount) +#define M_STK_SIZE 48 + /* Would not expect to need to save cr, but glibc version of */ + /* _mcount() does, so cautiously saving it here too. */ + stwu r1,-M_STK_SIZE(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r3 /* will use as first arg to __trace() */ + mfcr r4 + lis r5,mcount_enabled@ha + lwz r5,mcount_enabled@l(r5) + cmpwi r5,0 + stw r3, 44(r1) /* lr */ + stw r4, 8(r1) /* cr */ + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + beq 1f + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + lwz r4,M_STK_SIZE+4(r1) + bl __trace +1: + lwz r8, 8(r1) /* cr */ + lwz r9, 44(r1) /* lr */ + lwz r3, 12(r1) + lwz r4, 16(r1) + lwz r5, 20(r1) + mtcrf 0xff,r8 + mtctr r9 + lwz r0, 52(r1) + lwz r6, 24(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + lwz r9, 36(r1) + lwz r10,40(r1) + addi r1,r1,M_STK_SIZE + mtlr r0 + bctr + +#endif /* CONFIG_MCOUNT */ Index: linux.prev/arch/ppc/kernel/idle.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/idle.c +++ linux.prev/arch/ppc/kernel/idle.c @@ -41,7 +41,7 @@ void default_idle(void) powersave = ppc_md.power_save; - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { if (powersave != NULL) powersave(); #ifdef CONFIG_SMP @@ -64,6 +64,10 @@ void cpu_idle(void) for (;;) { while (!need_resched()) { + BUG_ON(raw_irqs_disabled()); + stop_critical_timing(); + propagate_preempt_locks_value(); + if (ppc_md.idle != NULL) ppc_md.idle(); else @@ -72,9 +76,11 @@ void cpu_idle(void) if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); - preempt_enable_no_resched(); - schedule(); + raw_local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + raw_local_irq_enable(); } } Index: linux.prev/arch/ppc/kernel/ppc_ksyms.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/ppc_ksyms.c +++ linux.prev/arch/ppc/kernel/ppc_ksyms.c @@ -272,7 +272,6 @@ EXPORT_SYMBOL(screen_info); EXPORT_SYMBOL(__delay); EXPORT_SYMBOL(timer_interrupt); -EXPORT_SYMBOL(irq_desc); EXPORT_SYMBOL(tb_ticks_per_jiffy); EXPORT_SYMBOL(get_wchan); EXPORT_SYMBOL(console_drivers); @@ -280,9 +279,6 @@ EXPORT_SYMBOL(console_drivers); EXPORT_SYMBOL(xmon); EXPORT_SYMBOL(xmon_printf); #endif -EXPORT_SYMBOL(__up); -EXPORT_SYMBOL(__down); -EXPORT_SYMBOL(__down_interruptible); #if defined(CONFIG_KGDB) || defined(CONFIG_XMON) extern void (*debugger)(struct pt_regs *regs); Index: linux.prev/arch/ppc/kernel/process.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/process.c +++ linux.prev/arch/ppc/kernel/process.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include #include @@ -52,8 +54,8 @@ struct task_struct *last_task_used_math struct task_struct *last_task_used_altivec = NULL; struct task_struct *last_task_used_spe = NULL; -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); @@ -301,7 +303,7 @@ struct task_struct *__switch_to(struct t unsigned long s; struct task_struct *last; - local_irq_save(s); + raw_local_irq_save(s); #ifdef CHECK_STACK check_stack(prev); check_stack(new); @@ -364,7 +366,7 @@ struct task_struct *__switch_to(struct t new_thread = &new->thread; old_thread = ¤t->thread; last = _switch(old_thread, new_thread); - local_irq_restore(s); + raw_local_irq_restore(s); return last; } Index: linux.prev/arch/ppc/kernel/semaphore.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/semaphore.c +++ linux.prev/arch/ppc/kernel/semaphore.c @@ -29,7 +29,7 @@ * sem->count = tmp; * return old_count; */ -static inline int __sem_update_count(struct semaphore *sem, int incr) +static inline int __sem_update_count(struct compat_semaphore *sem, int incr) { int old_count, tmp; @@ -48,7 +48,7 @@ static inline int __sem_update_count(str return old_count; } -void __up(struct semaphore *sem) +void __compat_up(struct compat_semaphore *sem) { /* * Note that we incremented count in up() before we came here, @@ -70,7 +70,7 @@ void __up(struct semaphore *sem) * Thus it is only when we decrement count from some value > 0 * that we have actually got the semaphore. */ -void __sched __down(struct semaphore *sem) +void __sched __compat_down(struct compat_semaphore *sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -100,7 +100,7 @@ void __sched __down(struct semaphore *se wake_up(&sem->wait); } -int __sched __down_interruptible(struct semaphore * sem) +int __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -129,3 +129,8 @@ int __sched __down_interruptible(struct wake_up(&sem->wait); return retval; } + +int compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} Index: linux.prev/arch/ppc/kernel/smp-tbsync.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/smp-tbsync.c +++ linux.prev/arch/ppc/kernel/smp-tbsync.c @@ -49,7 +49,7 @@ smp_generic_take_timebase( void ) { int cmd, tbl, tbu; - local_irq_disable(); + raw_local_irq_disable(); while( !running ) ; rmb(); @@ -78,7 +78,7 @@ smp_generic_take_timebase( void ) } enter_contest( tbsync->mark, -1 ); } - local_irq_enable(); + raw_local_irq_enable(); } static int __devinit @@ -88,7 +88,7 @@ start_contest( int cmd, int offset, int tbsync->cmd = cmd; - local_irq_disable(); + raw_local_irq_disable(); for( i=-3; itbu = tbu = get_tbu(); @@ -114,7 +114,7 @@ start_contest( int cmd, int offset, int if( i++ > 0 ) score += tbsync->race_result; } - local_irq_enable(); + raw_local_irq_enable(); return score; } Index: linux.prev/arch/ppc/kernel/smp.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/smp.c +++ linux.prev/arch/ppc/kernel/smp.c @@ -138,6 +138,16 @@ void smp_send_reschedule(int cpu) smp_message_pass(cpu, PPC_MSG_RESCHEDULE); } +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + smp_message_pass(MSG_ALL_BUT_SELF, PPC_MSG_RESCHEDULE, 0, 0); +} + #ifdef CONFIG_XMON void smp_send_xmon_break(int cpu) { @@ -147,7 +157,7 @@ void smp_send_xmon_break(int cpu) static void stop_this_cpu(void *dummy) { - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } @@ -162,7 +172,7 @@ void smp_send_stop(void) * static memory requirements. It also looks cleaner. * Stolen from the i386 version. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); static struct call_data_struct { void (*func) (void *info); @@ -197,7 +207,7 @@ int smp_call_function(void (*func) (void if (num_online_cpus() <= 1) return 0; /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); + WARN_ON(raw_irqs_disabled()); return __smp_call_function(func, info, wait, MSG_ALL_BUT_SELF); } @@ -358,7 +368,7 @@ int __devinit start_secondary(void *unus cpu_set(cpu, cpu_online_map); spin_unlock(&call_lock); - local_irq_enable(); + raw_local_irq_enable(); cpu_idle(); return 0; Index: linux.prev/arch/ppc/kernel/temp.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/temp.c +++ linux.prev/arch/ppc/kernel/temp.c @@ -142,7 +142,7 @@ static void tau_timeout(void * info) int shrink; /* disabling interrupts *should* be okay */ - local_irq_save(flags); + raw_local_irq_save(flags); cpu = smp_processor_id(); #ifndef CONFIG_TAU_INT @@ -185,7 +185,7 @@ static void tau_timeout(void * info) */ mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void tau_timeout_smp(unsigned long unused) Index: linux.prev/arch/ppc/kernel/time.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/time.c +++ linux.prev/arch/ppc/kernel/time.c @@ -66,6 +66,9 @@ #include +unsigned long cpu_khz; /* Detected as we calibrate the TSC */ +EXPORT_SYMBOL(cpu_khz); + unsigned long disarm_decr[NR_CPUS]; extern struct timezone sys_tz; @@ -86,7 +89,7 @@ extern unsigned long wall_jiffies; /* used for timezone offset */ static long timezone_offset; -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); @@ -104,7 +107,7 @@ static inline int tb_delta(unsigned *jif } #ifdef CONFIG_SMP -unsigned long profile_pc(struct pt_regs *regs) +unsigned long notrace profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); Index: linux.prev/arch/ppc/kernel/traps.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/traps.c +++ linux.prev/arch/ppc/kernel/traps.c @@ -77,7 +77,7 @@ void (*debugger_fault_handler)(struct pt * Trap & Exception support */ -DEFINE_SPINLOCK(die_lock); +DEFINE_RAW_SPINLOCK(die_lock); int die(const char * str, struct pt_regs * fp, long err) { @@ -118,6 +118,10 @@ void _exception(int signr, struct pt_reg debugger(regs); die("Exception in kernel mode", regs, signr); } +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif info.si_signo = signr; info.si_errno = 0; info.si_code = code; Index: linux.prev/arch/ppc/lib/locks.c =================================================================== --- linux.prev.orig/arch/ppc/lib/locks.c +++ linux.prev/arch/ppc/lib/locks.c @@ -43,7 +43,7 @@ static inline unsigned long __spin_trylo return ret; } -void _raw_spin_lock(spinlock_t *lock) +void __raw_spin_lock(raw_spinlock_t *lock) { int cpu = smp_processor_id(); unsigned int stuck = INIT_STUCK; @@ -63,9 +63,9 @@ void _raw_spin_lock(spinlock_t *lock) lock->owner_pc = (unsigned long)__builtin_return_address(0); lock->owner_cpu = cpu; } -EXPORT_SYMBOL(_raw_spin_lock); +EXPORT_SYMBOL(__raw_spin_lock); -int _raw_spin_trylock(spinlock_t *lock) +int __raw_spin_trylock(raw_spinlock_t *lock) { if (__spin_trylock(&lock->lock)) return 0; @@ -73,9 +73,9 @@ int _raw_spin_trylock(spinlock_t *lock) lock->owner_pc = (unsigned long)__builtin_return_address(0); return 1; } -EXPORT_SYMBOL(_raw_spin_trylock); +EXPORT_SYMBOL(__raw_spin_trylock); -void _raw_spin_unlock(spinlock_t *lp) +void __raw_spin_unlock(raw_spinlock_t *lp) { if ( !lp->lock ) printk("_spin_unlock(%p): no lock cpu %d curr PC %p %s/%d\n", @@ -89,13 +89,13 @@ void _raw_spin_unlock(spinlock_t *lp) wmb(); lp->lock = 0; } -EXPORT_SYMBOL(_raw_spin_unlock); +EXPORT_SYMBOL(__raw_spin_unlock); /* * For rwlocks, zero is unlocked, -1 is write-locked, * positive is read-locked. */ -static __inline__ int __read_trylock(rwlock_t *rw) +static __inline__ int __read_trylock(raw_rwlock_t *rw) { signed int tmp; @@ -115,13 +115,13 @@ static __inline__ int __read_trylock(rwl return tmp; } -int _raw_read_trylock(rwlock_t *rw) +int __raw_read_trylock(raw_rwlock_t *rw) { return __read_trylock(rw) > 0; } -EXPORT_SYMBOL(_raw_read_trylock); +EXPORT_SYMBOL(__raw_read_trylock); -void _raw_read_lock(rwlock_t *rw) +void __raw_read_lock(rwlock_t *rw) { unsigned int stuck; @@ -136,9 +136,9 @@ void _raw_read_lock(rwlock_t *rw) } } } -EXPORT_SYMBOL(_raw_read_lock); +EXPORT_SYMBOL(__raw_read_lock); -void _raw_read_unlock(rwlock_t *rw) +void __raw_read_unlock(raw_rwlock_t *rw) { if ( rw->lock == 0 ) printk("_read_unlock(): %s/%d (nip %08lX) lock %d\n", @@ -147,9 +147,9 @@ void _raw_read_unlock(rwlock_t *rw) wmb(); atomic_dec((atomic_t *) &(rw)->lock); } -EXPORT_SYMBOL(_raw_read_unlock); +EXPORT_SYMBOL(__raw_read_unlock); -void _raw_write_lock(rwlock_t *rw) +void __raw_write_lock(raw_rwlock_t *rw) { unsigned int stuck; @@ -165,18 +165,18 @@ void _raw_write_lock(rwlock_t *rw) } wmb(); } -EXPORT_SYMBOL(_raw_write_lock); +EXPORT_SYMBOL(__raw_write_lock); -int _raw_write_trylock(rwlock_t *rw) +int __raw_write_trylock(raw_rwlock_t *rw) { if (cmpxchg(&rw->lock, 0, -1) != 0) return 0; wmb(); return 1; } -EXPORT_SYMBOL(_raw_write_trylock); +EXPORT_SYMBOL(__raw_write_trylock); -void _raw_write_unlock(rwlock_t *rw) +void __raw_write_unlock(raw_rwlock_t *rw) { if (rw->lock >= 0) printk("_write_lock(): %s/%d (nip %08lX) lock %d\n", @@ -185,6 +185,6 @@ void _raw_write_unlock(rwlock_t *rw) wmb(); rw->lock = 0; } -EXPORT_SYMBOL(_raw_write_unlock); +EXPORT_SYMBOL(__raw_write_unlock); #endif Index: linux.prev/arch/ppc/mm/fault.c =================================================================== --- linux.prev.orig/arch/ppc/mm/fault.c +++ linux.prev/arch/ppc/mm/fault.c @@ -92,7 +92,7 @@ static int store_updates_sp(struct pt_re * the error_code parameter is ESR for a data fault, 0 for an instruction * fault. */ -int do_page_fault(struct pt_regs *regs, unsigned long address, +int notrace do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { struct vm_area_struct * vma; Index: linux.prev/arch/ppc/mm/init.c =================================================================== --- linux.prev.orig/arch/ppc/mm/init.c +++ linux.prev/arch/ppc/mm/init.c @@ -56,7 +56,7 @@ #endif #define MAX_LOW_MEM CONFIG_LOWMEM_SIZE -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long total_memory; unsigned long total_lowmem; Index: linux.prev/arch/ppc/platforms/4xx/xilinx_ml300.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/4xx/xilinx_ml300.c +++ linux.prev/arch/ppc/platforms/4xx/xilinx_ml300.c @@ -62,7 +62,7 @@ static volatile unsigned *powerdown_base static void xilinx_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); out_be32(powerdown_base, XPAR_POWER_0_POWERDOWN_VALUE); while (1) ; } Index: linux.prev/arch/ppc/platforms/apus_setup.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/apus_setup.c +++ linux.prev/arch/ppc/platforms/apus_setup.c @@ -282,6 +282,7 @@ void apus_calibrate_decr(void) freq/1000000, freq%1000000); tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; __bus_speed = bus_speed; __speed_test_failed = speed_test_failed; @@ -480,7 +481,7 @@ void cache_clear(__u32 addr, int length) void apus_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); APUS_WRITE(APUS_REG_LOCK, REGLOCK_BLACKMAGICK1|REGLOCK_BLACKMAGICK2); @@ -598,7 +599,7 @@ int __debug_serinit( void ) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* turn off Rx and Tx interrupts */ custom.intena = IF_RBF | IF_TBE; @@ -606,7 +607,7 @@ int __debug_serinit( void ) /* clear any pending interrupt */ custom.intreq = IF_RBF | IF_TBE; - local_irq_restore(flags); + raw_local_irq_restore(flags); /* * set the appropriate directions for the modem control flags, Index: linux.prev/arch/ppc/platforms/chestnut.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/chestnut.c +++ linux.prev/arch/ppc/platforms/chestnut.c @@ -455,7 +455,7 @@ chestnut_restart(char *cmd) { volatile ulong i = 10000000; - local_irq_disable(); + raw_local_irq_disable(); /* * Set CPLD Reg 3 bit 0 to 1 to allow MPP signals on reset to work @@ -474,7 +474,7 @@ chestnut_restart(char *cmd) static void chestnut_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); for (;;); /* NOTREACHED */ } Index: linux.prev/arch/ppc/platforms/chrp_smp.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/chrp_smp.c +++ linux.prev/arch/ppc/platforms/chrp_smp.c @@ -58,7 +58,7 @@ smp_chrp_setup_cpu(int cpu_nr) do_openpic_setup_cpu(); } -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned int timebase_upper = 0, timebase_lower = 0; void __devinit Index: linux.prev/arch/ppc/platforms/chrp_time.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/chrp_time.c +++ linux.prev/arch/ppc/platforms/chrp_time.c @@ -28,7 +28,7 @@ #include #include -extern spinlock_t rtc_lock; +extern raw_spinlock_t rtc_lock; static int nvram_as1 = NVRAM_AS1; static int nvram_as0 = NVRAM_AS0; @@ -188,4 +188,5 @@ void __init chrp_calibrate_decr(void) freq/1000000, freq%1000000); tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; } Index: linux.prev/arch/ppc/platforms/cpci690.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/cpci690.c +++ linux.prev/arch/ppc/platforms/cpci690.c @@ -322,7 +322,7 @@ cpci690_reset_board(void) { u32 i = 10000; - local_irq_disable(); + raw_local_irq_disable(); out_8((cpci690_br_base + CPCI690_BR_SW_RESET), 0x11); while (i != 0) i++; Index: linux.prev/arch/ppc/platforms/ev64260.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/ev64260.c +++ linux.prev/arch/ppc/platforms/ev64260.c @@ -446,7 +446,7 @@ ev64260_platform_notify(struct device *d static void ev64260_reset_board(void *addr) { - local_irq_disable(); + raw_local_irq_disable(); /* disable and invalidate the L2 cache */ _set_L2CR(0); @@ -514,7 +514,7 @@ ev64260_restart(char *cmd) static void ev64260_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); /* NOTREACHED */ } @@ -553,6 +553,7 @@ ev64260_calibrate_decr(void) tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; return; } Index: linux.prev/arch/ppc/platforms/gemini_setup.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/gemini_setup.c +++ linux.prev/arch/ppc/platforms/gemini_setup.c @@ -303,7 +303,7 @@ void __init gemini_init_l2(void) void gemini_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* make a clean restart, not via the MPIC */ _gemini_reboot(); for(;;); @@ -462,6 +462,7 @@ void __init gemini_calibrate_decr(void) divisor = 4; tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } unsigned long __init gemini_find_end_of_memory(void) Index: linux.prev/arch/ppc/platforms/hdpu.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/hdpu.c +++ linux.prev/arch/ppc/platforms/hdpu.c @@ -474,7 +474,7 @@ static void hdpu_reset_board(void) hdpu_cpustate_set(CPUSTATE_KERNEL_MAJOR | CPUSTATE_KERNEL_RESET); - local_irq_disable(); + raw_local_irq_disable(); /* Clear all the LEDs */ mv64x60_write(&bh, MV64x60_GPP_VALUE_CLR, ((1 << 4) | @@ -516,7 +516,7 @@ static void hdpu_restart(char *cmd) static void hdpu_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); hdpu_cpustate_set(CPUSTATE_KERNEL_MAJOR | CPUSTATE_KERNEL_HALT); Index: linux.prev/arch/ppc/platforms/lopec.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/lopec.c +++ linux.prev/arch/ppc/platforms/lopec.c @@ -153,7 +153,7 @@ lopec_restart(char *cmd) reg |= 0x80; *((unsigned char *) LOPEC_SYSSTAT1) = reg; - local_irq_disable(); + raw_local_irq_disable(); while(1); #undef LOPEC_SYSSTAT1 } @@ -161,7 +161,7 @@ lopec_restart(char *cmd) static void lopec_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while(1); } Index: linux.prev/arch/ppc/platforms/mvme5100.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/mvme5100.c +++ linux.prev/arch/ppc/platforms/mvme5100.c @@ -262,7 +262,7 @@ mvme5100_map_io(void) static void mvme5100_reset_board(void) { - local_irq_disable(); + raw_local_irq_disable(); /* Set exception prefix high - to the firmware */ _nmask_and_or_msr(0, MSR_IP); @@ -286,7 +286,7 @@ mvme5100_restart(char *cmd) static void mvme5100_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); } Index: linux.prev/arch/ppc/platforms/pal4_setup.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/pal4_setup.c +++ linux.prev/arch/ppc/platforms/pal4_setup.c @@ -82,7 +82,7 @@ pal4_show_cpuinfo(struct seq_file *m) static void pal4_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); __asm__ __volatile__("lis 3,0xfff0\n \ ori 3,3,0x100\n \ mtspr 26,3\n \ @@ -96,7 +96,7 @@ pal4_restart(char *cmd) static void pal4_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } Index: linux.prev/arch/ppc/platforms/pmac_cpufreq.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/pmac_cpufreq.c +++ linux.prev/arch/ppc/platforms/pmac_cpufreq.c @@ -285,7 +285,7 @@ static int pmu_set_cpu_speed(int low_spe asm volatile("mtdec %0" : : "r" (0x7fffffff)); /* We can now disable MSR_EE */ - local_irq_save(flags); + raw_local_irq_save(flags); /* Giveup the FPU & vec */ enable_kernel_fp(); @@ -341,7 +341,7 @@ static int pmu_set_cpu_speed(int low_spe openpic_set_priority(pic_prio); /* Let interrupts flow again ... */ - local_irq_restore(flags); + raw_local_irq_restore(flags); #ifdef DEBUG_FREQ debug_calc_bogomips(); Index: linux.prev/arch/ppc/platforms/pmac_feature.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/pmac_feature.c +++ linux.prev/arch/ppc/platforms/pmac_feature.c @@ -63,7 +63,7 @@ extern struct device_node *k2_skiplist[2 * We use a single global lock to protect accesses. Each driver has * to take care of its own locking */ -static DEFINE_SPINLOCK(feature_lock); +static DEFINE_RAW_SPINLOCK(feature_lock); #define LOCK(flags) spin_lock_irqsave(&feature_lock, flags); #define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags); Index: linux.prev/arch/ppc/platforms/pmac_nvram.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/pmac_nvram.c +++ linux.prev/arch/ppc/platforms/pmac_nvram.c @@ -80,7 +80,7 @@ static volatile unsigned char *nvram_dat static int nvram_mult, is_core_99; static int core99_bank = 0; static int nvram_partitions[3]; -static DEFINE_SPINLOCK(nv_lock); +static DEFINE_RAW_SPINLOCK(nv_lock); extern int pmac_newworld; extern int system_running; Index: linux.prev/arch/ppc/platforms/pmac_pic.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/pmac_pic.c +++ linux.prev/arch/ppc/platforms/pmac_pic.c @@ -69,7 +69,7 @@ static int max_irqs; static int max_real_irqs; static u32 level_mask[4]; -static DEFINE_SPINLOCK(pmac_pic_lock); +static DEFINE_RAW_SPINLOCK(pmac_pic_lock); #define GATWICK_IRQ_POOL_SIZE 10 Index: linux.prev/arch/ppc/platforms/pmac_smp.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/pmac_smp.c +++ linux.prev/arch/ppc/platforms/pmac_smp.c @@ -499,8 +499,8 @@ static void __devinit smp_core99_kick_cp return; if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu", 0x346); - local_irq_save(flags); - local_irq_disable(); + raw_local_irq_save(flags); + raw_local_irq_disable(); /* Save reset vector */ save_vector = *vector; @@ -528,7 +528,7 @@ static void __devinit smp_core99_kick_cp *vector = save_vector; flush_icache_range((unsigned long) vector, (unsigned long) vector + 4); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); } @@ -570,7 +570,7 @@ void smp_core99_take_timebase(void) mb(); /* set our stuff the same as the primary */ - local_irq_save(flags); + raw_local_irq_save(flags); set_dec(1); set_tb(pri_tb_hi, pri_tb_lo); last_jiffy_stamp(smp_processor_id()) = pri_tb_stamp; @@ -579,7 +579,7 @@ void smp_core99_take_timebase(void) /* tell the primary we're done */ sec_tb_reset = 0; mb(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* not __init, called in sleep/wakeup code */ @@ -599,7 +599,7 @@ void smp_core99_give_timebase(void) /* freeze the timebase and read it */ /* disable interrupts so the timebase is disabled for the shortest possible time */ - local_irq_save(flags); + raw_local_irq_save(flags); pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 4); pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0); mb(); @@ -623,7 +623,7 @@ void smp_core99_give_timebase(void) /* Now, restart the timebase by leaving the GPIO to an open collector */ pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 0); pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0); - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/ppc/platforms/pmac_time.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/pmac_time.c +++ linux.prev/arch/ppc/platforms/pmac_time.c @@ -197,6 +197,7 @@ via_calibrate_decr(void) tb_ticks_per_jiffy = (dstart - dend) / ((6 * HZ)/100); tb_to_us = mulhwu_scale_factor(dstart - dend, 60000); + cpu_khz = (dstart - dend) / 60; printk(KERN_INFO "via_calibrate_decr: ticks per jiffy = %u (%u ticks)\n", tb_ticks_per_jiffy, dstart - dend); @@ -288,4 +289,5 @@ pmac_calibrate_decr(void) freq/1000000, freq%1000000); tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; } Index: linux.prev/arch/ppc/platforms/powerpmc250.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/powerpmc250.c +++ linux.prev/arch/ppc/platforms/powerpmc250.c @@ -166,12 +166,13 @@ powerpmc250_calibrate_decr(void) tb_ticks_per_jiffy = freq / (HZ * divisor); tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } static void powerpmc250_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* Hard reset */ writeb(0x11, 0xfe000332); while(1); @@ -180,7 +181,7 @@ powerpmc250_restart(char *cmd) static void powerpmc250_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); } Index: linux.prev/arch/ppc/platforms/pplus.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/pplus.c +++ linux.prev/arch/ppc/platforms/pplus.c @@ -607,7 +607,7 @@ static void pplus_restart(char *cmd) { unsigned long i = 10000; - local_irq_disable(); + raw_local_irq_disable(); /* set VIA IDE controller into native mode */ pplus_set_VIA_IDE_native(); Index: linux.prev/arch/ppc/platforms/prep_setup.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/prep_setup.c +++ linux.prev/arch/ppc/platforms/prep_setup.c @@ -464,7 +464,7 @@ static void prep_restart(char *cmd) { #define PREP_SP92 0x92 /* Special Port 92 */ - local_irq_disable(); /* no interrupts */ + raw_local_irq_disable(); /* no interrupts */ /* set exception prefix high - to the prom */ _nmask_and_or_msr(0, MSR_IP); @@ -482,7 +482,7 @@ prep_restart(char *cmd) static void prep_halt(void) { - local_irq_disable(); /* no interrupts */ + raw_local_irq_disable(); /* no interrupts */ /* set exception prefix high - to the prom */ _nmask_and_or_msr(0, MSR_IP); @@ -550,7 +550,7 @@ prep_sig750_poweroff(void) { /* tweak the power manager found in most IBM PRePs (except Thinkpads) */ - local_irq_disable(); + raw_local_irq_disable(); /* set exception prefix high - to the prom */ _nmask_and_or_msr(0, MSR_IP); @@ -944,6 +944,7 @@ prep_calibrate_decr(void) (freq/divisor)/1000000, (freq/divisor)%1000000); tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; tb_ticks_per_jiffy = freq / HZ / divisor; } } Index: linux.prev/arch/ppc/platforms/prpmc750.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/prpmc750.c +++ linux.prev/arch/ppc/platforms/prpmc750.c @@ -271,18 +271,19 @@ static void __init prpmc750_calibrate_de tb_ticks_per_jiffy = freq / (HZ * divisor); tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } static void prpmc750_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); writeb(PRPMC750_MODRST_MASK, PRPMC750_MODRST_REG); while (1) ; } static void prpmc750_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } Index: linux.prev/arch/ppc/platforms/prpmc800.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/prpmc800.c +++ linux.prev/arch/ppc/platforms/prpmc800.c @@ -330,6 +330,7 @@ static void __init prpmc800_calibrate_de tb_ticks_per_second = 100000000 / 4; tb_ticks_per_jiffy = tb_ticks_per_second / HZ; tb_to_us = mulhwu_scale_factor(tb_ticks_per_second, 1000000); + cpu_khz = tb_ticks_per_second / 1000; return; } @@ -370,13 +371,14 @@ static void __init prpmc800_calibrate_de tb_ticks_per_second = (tbl_end - tbl_start) * 2; tb_ticks_per_jiffy = tb_ticks_per_second / HZ; tb_to_us = mulhwu_scale_factor(tb_ticks_per_second, 1000000); + cpu_khz = tb_ticks_per_second / 1000; } static void prpmc800_restart(char *cmd) { ulong temp; - local_irq_disable(); + raw_local_irq_disable(); temp = in_be32((uint *) HARRIER_MISC_CSR_REG); temp |= HARRIER_RSTOUT; out_be32((uint *) HARRIER_MISC_CSR_REG, temp); @@ -385,7 +387,7 @@ static void prpmc800_restart(char *cmd) static void prpmc800_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } Index: linux.prev/arch/ppc/platforms/radstone_ppc7d.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/radstone_ppc7d.c +++ linux.prev/arch/ppc/platforms/radstone_ppc7d.c @@ -176,7 +176,7 @@ static void ppc7d_power_off(void) { u32 data; - local_irq_disable(); + raw_local_irq_disable(); /* Ensure that internal MV643XX watchdog is disabled. * The Disco watchdog uses MPP17 on this hardware. Index: linux.prev/arch/ppc/platforms/sandpoint.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/sandpoint.c +++ linux.prev/arch/ppc/platforms/sandpoint.c @@ -527,7 +527,7 @@ sandpoint_map_io(void) static void sandpoint_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* Set exception prefix high - to the firmware */ _nmask_and_or_msr(0, MSR_IP); @@ -541,7 +541,7 @@ sandpoint_restart(char *cmd) static void sandpoint_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); /* No way to shut power off with software */ /* NOTREACHED */ } Index: linux.prev/arch/ppc/platforms/sbc82xx.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/sbc82xx.c +++ linux.prev/arch/ppc/platforms/sbc82xx.c @@ -68,7 +68,7 @@ static void sbc82xx_time_init(void) static volatile char *sbc82xx_i8259_map; static char sbc82xx_i8259_mask = 0xff; -static DEFINE_SPINLOCK(sbc82xx_i8259_lock); +static DEFINE_RAW_SPINLOCK(sbc82xx_i8259_lock); static void sbc82xx_i8259_mask_and_ack_irq(unsigned int irq_nr) { Index: linux.prev/arch/ppc/platforms/spruce.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/spruce.c +++ linux.prev/arch/ppc/platforms/spruce.c @@ -150,6 +150,7 @@ spruce_calibrate_decr(void) freq = SPRUCE_BUS_SPEED; tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } static int @@ -236,7 +237,7 @@ spruce_setup_arch(void) static void spruce_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* SRR0 has system reset vector, SRR1 has default MSR value */ /* rfi restores MSR from SRR1 and sets the PC to the SRR0 value */ Index: linux.prev/arch/ppc/syslib/cpm2_common.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/cpm2_common.c +++ linux.prev/arch/ppc/syslib/cpm2_common.c @@ -114,7 +114,7 @@ cpm2_fastbrg(uint brg, uint rate, int di /* * dpalloc / dpfree bits. */ -static spinlock_t cpm_dpmem_lock; +static raw_spinlock_t cpm_dpmem_lock; /* 16 blocks should be enough to satisfy all requests * until the memory subsystem goes up... */ static rh_block_t cpm_boot_dpmem_rh_block[16]; Index: linux.prev/arch/ppc/syslib/ibm440gx_common.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/ibm440gx_common.c +++ linux.prev/arch/ppc/syslib/ibm440gx_common.c @@ -157,7 +157,7 @@ void __init ibm440gx_l2c_enable(void){ return; } - local_irq_save(flags); + raw_local_irq_save(flags); asm volatile ("sync" ::: "memory"); /* Disable SRAM */ @@ -201,7 +201,7 @@ void __init ibm440gx_l2c_enable(void){ mtdcr(DCRN_L2C0_CFG, r); asm volatile ("sync; isync" ::: "memory"); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* Disable L2 cache */ @@ -209,7 +209,7 @@ void __init ibm440gx_l2c_disable(void){ u32 r; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); asm volatile ("sync" ::: "memory"); /* Disable L2C mode */ @@ -228,7 +228,7 @@ void __init ibm440gx_l2c_disable(void){ SRAM_SBCR_BAS3 | SRAM_SBCR_BS_64KB | SRAM_SBCR_BU_RW); asm volatile ("sync; isync" ::: "memory"); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __init ibm440gx_l2c_setup(struct ibm44x_clocks* p) Index: linux.prev/arch/ppc/syslib/ibm44x_common.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/ibm44x_common.c +++ linux.prev/arch/ppc/syslib/ibm44x_common.c @@ -66,6 +66,7 @@ void __init ibm44x_calibrate_decr(unsign { tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; /* Set the time base to zero */ mtspr(SPRN_TBWL, 0); @@ -82,19 +83,19 @@ extern void abort(void); static void ibm44x_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); abort(); } static void ibm44x_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } static void ibm44x_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } Index: linux.prev/arch/ppc/syslib/m8260_pci_erratum9.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/m8260_pci_erratum9.c +++ linux.prev/arch/ppc/syslib/m8260_pci_erratum9.c @@ -132,7 +132,7 @@ idma_pci9_read(u8 *dst, u8 *src, int byt volatile idma_bd_t *bd = &idma_dpram->bd; volatile cpm2_map_t *immap = cpm2_immr; - local_irq_save(flags); + raw_local_irq_save(flags); /* initialize IDMA parameter RAM for this transfer */ if (sinc) @@ -161,7 +161,7 @@ idma_pci9_read(u8 *dst, u8 *src, int byt /* wait for transfer to complete */ while(bd->flags & IDMA_BD_V); - local_irq_restore(flags); + raw_local_irq_restore(flags); return; } @@ -184,7 +184,7 @@ idma_pci9_write(u8 *dst, u8 *src, int by volatile idma_bd_t *bd = &idma_dpram->bd; volatile cpm2_map_t *immap = cpm2_immr; - local_irq_save(flags); + raw_local_irq_save(flags); /* initialize IDMA parameter RAM for this transfer */ if (dinc) @@ -213,7 +213,7 @@ idma_pci9_write(u8 *dst, u8 *src, int by /* wait for transfer to complete */ while(bd->flags & IDMA_BD_V); - local_irq_restore(flags); + raw_local_irq_restore(flags); return; } Index: linux.prev/arch/ppc/syslib/m8260_setup.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/m8260_setup.c +++ linux.prev/arch/ppc/syslib/m8260_setup.c @@ -82,6 +82,7 @@ m8260_calibrate_decr(void) divisor = 4; tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } /* The 8260 has an internal 1-second timer update register that @@ -132,7 +133,7 @@ m8260_restart(char *cmd) static void m8260_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); } Index: linux.prev/arch/ppc/syslib/m8xx_setup.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/m8xx_setup.c +++ linux.prev/arch/ppc/syslib/m8xx_setup.c @@ -160,6 +160,7 @@ void __init m8xx_calibrate_decr(void) printk("Decrementer Frequency = %d/%d\n", freq, divisor); tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; /* Perform some more timer/timebase initialization. This used * to be done elsewhere, but other changes caused it to get @@ -231,7 +232,7 @@ m8xx_restart(char *cmd) { __volatile__ unsigned char dummy; - local_irq_disable(); + raw_local_irq_disable(); out_be32(&((immap_t *)IMAP_ADDR)->im_clkrst.car_plprcr, in_be32(&((immap_t *)IMAP_ADDR)->im_clkrst.car_plprcr) | 0x00000080); /* Clear the ME bit in MSR to cause checkstop on machine check Index: linux.prev/arch/ppc/syslib/mpc52xx_setup.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/mpc52xx_setup.c +++ linux.prev/arch/ppc/syslib/mpc52xx_setup.c @@ -40,7 +40,7 @@ mpc52xx_restart(char *cmd) { struct mpc52xx_gpt __iomem *gpt0 = MPC52xx_VA(MPC52xx_GPTx_OFFSET(0)); - local_irq_disable(); + raw_local_irq_disable(); /* Turn on the watchdog and wait for it to expire. It effectively does a reset */ @@ -53,7 +53,7 @@ mpc52xx_restart(char *cmd) void mpc52xx_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); } @@ -214,6 +214,7 @@ mpc52xx_calibrate_decr(void) tb_ticks_per_jiffy = xlbfreq / HZ / divisor; tb_to_us = mulhwu_scale_factor(xlbfreq / divisor, 1000000); + cpu_khz = (xlbfreq / divisor) / 1000; } int mpc52xx_match_psc_function(int psc_idx, const char *func) Index: linux.prev/arch/ppc/syslib/ocp.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/ocp.c +++ linux.prev/arch/ppc/syslib/ocp.c @@ -45,11 +45,11 @@ #include #include #include +#include #include #include #include -#include #include //#define DBG(x) printk x Index: linux.prev/arch/ppc/syslib/open_pic.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/open_pic.c +++ linux.prev/arch/ppc/syslib/open_pic.c @@ -529,7 +529,7 @@ void openpic_reset_processor_phys(u_int } #if defined(CONFIG_SMP) || defined(CONFIG_PM) -static DEFINE_SPINLOCK(openpic_setup_lock); +static DEFINE_RAW_SPINLOCK(openpic_setup_lock); #endif #ifdef CONFIG_SMP Index: linux.prev/arch/ppc/syslib/open_pic2.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/open_pic2.c +++ linux.prev/arch/ppc/syslib/open_pic2.c @@ -383,7 +383,7 @@ static void openpic2_set_spurious(u_int vec); } -static DEFINE_SPINLOCK(openpic2_setup_lock); +static DEFINE_RAW_SPINLOCK(openpic2_setup_lock); /* * Initialize a timer interrupt (and disable it) Index: linux.prev/arch/ppc/syslib/ppc4xx_setup.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/ppc4xx_setup.c +++ linux.prev/arch/ppc/syslib/ppc4xx_setup.c @@ -142,7 +142,7 @@ static void ppc4xx_power_off(void) { printk("System Halted\n"); - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } @@ -150,7 +150,7 @@ static void ppc4xx_halt(void) { printk("System Halted\n"); - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } @@ -173,6 +173,7 @@ ppc4xx_calibrate_decr(void) freq = bip->bi_tbfreq; tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; /* Set the time base to zero. ** At 200 Mhz, time base will rollover in ~2925 years. Index: linux.prev/arch/ppc/syslib/ppc83xx_setup.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/ppc83xx_setup.c +++ linux.prev/arch/ppc/syslib/ppc83xx_setup.c @@ -138,7 +138,7 @@ mpc83xx_restart(char *cmd) reg = ioremap(BCSR_PHYS_ADDR, BCSR_SIZE); - local_irq_disable(); + raw_local_irq_disable(); /* * Unlock the BCSR bits so a PRST will update the contents. @@ -167,14 +167,14 @@ mpc83xx_restart(char *cmd) void mpc83xx_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } void mpc83xx_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } Index: linux.prev/arch/ppc/syslib/ppc85xx_setup.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/ppc85xx_setup.c +++ linux.prev/arch/ppc/syslib/ppc85xx_setup.c @@ -60,6 +60,7 @@ mpc85xx_calibrate_decr(void) divisor = 8; tb_ticks_per_jiffy = freq / divisor / HZ; tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; /* Set the time base to zero */ mtspr(SPRN_TBWL, 0); @@ -115,21 +116,21 @@ mpc85xx_early_serial_map(void) void mpc85xx_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); abort(); } void mpc85xx_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } void mpc85xx_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } Index: linux.prev/arch/ppc/syslib/prom.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/prom.c +++ linux.prev/arch/ppc/syslib/prom.c @@ -1396,7 +1396,7 @@ print_properties(struct device_node *np) } #endif -static DEFINE_SPINLOCK(rtas_lock); +static DEFINE_RAW_SPINLOCK(rtas_lock); /* this can be called after setup -- Cort */ int Index: linux.prev/arch/ppc/syslib/todc_time.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/todc_time.c +++ linux.prev/arch/ppc/syslib/todc_time.c @@ -508,6 +508,7 @@ todc_calibrate_decr(void) tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; return; } Index: linux.prev/arch/ppc/xmon/xmon.c =================================================================== --- linux.prev.orig/arch/ppc/xmon/xmon.c +++ linux.prev/arch/ppc/xmon/xmon.c @@ -297,10 +297,10 @@ irqreturn_t xmon_irq(int irq, void *d, struct pt_regs *regs) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); printf("Keyboard interrupt\n"); xmon(regs); - local_irq_restore(flags); + raw_local_irq_restore(flags); return IRQ_HANDLED; } Index: linux.prev/arch/sh64/kernel/time.c =================================================================== --- linux.prev.orig/arch/sh64/kernel/time.c +++ linux.prev/arch/sh64/kernel/time.c @@ -417,7 +417,7 @@ static __init unsigned int get_cpu_hz(vo /* ** Regardless the toolchain, force the compiler to use the ** arbitrary register r3 as a clock tick counter. - ** NOTE: r3 must be in accordance with rtc_interrupt() + ** NOTE: r3 must be in accordance with sh64_rtc_interrupt() */ register unsigned long long __rtc_irq_flag __asm__ ("r3"); @@ -482,7 +482,8 @@ static __init unsigned int get_cpu_hz(vo #endif } -static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t sh64_rtc_interrupt(int irq, void *dev_id, + struct pt_regs *regs) { ctrl_outb(0, RCR1); /* Disable Carry Interrupts */ regs->regs[3] = 1; /* Using r3 */ @@ -491,7 +492,7 @@ static irqreturn_t rtc_interrupt(int irq } static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL}; -static struct irqaction irq1 = { rtc_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "rtc", NULL, NULL}; +static struct irqaction irq1 = { sh64_rtc_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "rtc", NULL, NULL}; void __init time_init(void) { Index: linux.prev/arch/x86_64/Kconfig =================================================================== --- linux.prev.orig/arch/x86_64/Kconfig +++ linux.prev/arch/x86_64/Kconfig @@ -24,6 +24,14 @@ config X86 bool default y +config GENERIC_TIME + bool + default y + +config GENERIC_TIME_VSYSCALL + bool + default y + config SEMAPHORE_SLEEPERS bool default y @@ -38,13 +46,6 @@ config ISA config SBUS bool -config RWSEM_GENERIC_SPINLOCK - bool - default y - -config RWSEM_XCHGADD_ALGORITHM - bool - config GENERIC_CALIBRATE_DELAY bool default y @@ -199,6 +200,8 @@ config MTRR See for more information. +source "kernel/time/Kconfig" + config SMP bool "Symmetric multi-processing support" ---help--- @@ -237,6 +240,14 @@ config NUMA If the system is EM64T, you should say N unless your system is EM64T NUMA. +config RWSEM_GENERIC_SPINLOCK + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + depends on !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT + bool + config K8_NUMA bool "Old style AMD Opteron NUMA detection" depends on NUMA @@ -327,21 +338,6 @@ config HPET_TIMER as it is off-chip. You can find the HPET spec at . -config X86_PM_TIMER - bool "PM timer" - depends on ACPI - default y - help - Support the ACPI PM timer for time keeping. This is slow, - but is useful on some chipsets without HPET on systems with more - than one CPU. On a single processor or single socket multi core - system it is normally not required. - When the PM timer is active 64bit vsyscalls are disabled - and should not be enabled (/proc/sys/kernel/vsyscall64 should - not be changed). - The kernel selects the PM timer only as a last resort, so it is - useful to enable just in case. - config HPET_EMULATE_RTC bool "Provide RTC interrupt" depends on HPET_TIMER && RTC=y Index: linux.prev/arch/x86_64/boot/compressed/misc.c =================================================================== --- linux.prev.orig/arch/x86_64/boot/compressed/misc.c +++ linux.prev/arch/x86_64/boot/compressed/misc.c @@ -114,6 +114,7 @@ static char *vidmem = (char *)0xb8000; static int vidport; static int lines, cols; +#define ZLIB_INFLATE_NO_INFLATE_LOCK #include "../../../../lib/inflate.c" static void *malloc(int size) Index: linux.prev/arch/x86_64/ia32/sys_ia32.c =================================================================== --- linux.prev.orig/arch/x86_64/ia32/sys_ia32.c +++ linux.prev/arch/x86_64/ia32/sys_ia32.c @@ -456,6 +456,10 @@ sys32_settimeofday(struct compat_timeval struct timespec kts; struct timezone ktz; + int ret = timeofday_API_hacks(tv, tz); + if (ret != 1) + return ret; + if (tv) { if (get_tv32(&ktv, tv)) return -EFAULT; Index: linux.prev/arch/x86_64/kernel/Makefile =================================================================== --- linux.prev.orig/arch/x86_64/kernel/Makefile +++ linux.prev/arch/x86_64/kernel/Makefile @@ -29,7 +29,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_prin obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o +obj-$(CONFIG_SYSFS) += switch2poll.o obj-$(CONFIG_MODULES) += module.o Index: linux.prev/arch/x86_64/kernel/apic.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/apic.c +++ linux.prev/arch/x86_64/kernel/apic.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -38,13 +39,15 @@ int apic_verbosity; int disable_apic_timer __initdata; +/* + * cpu_mask that denotes the CPUs that needs timer interrupt coming in as + * IPIs in place of local APIC timers + */ +static cpumask_t timer_interrupt_broadcast_ipi_mask; + /* Using APIC to generate smp_local_timer_interrupt? */ int using_apic_timer = 0; -static DEFINE_PER_CPU(int, prof_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_old_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_counter) = 1; - static void apic_pm_activate(void); void enable_NMI_through_LVT0 (void * dummy) @@ -485,10 +488,9 @@ static int lapic_suspend(struct sys_devi apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); - local_save_flags(flags); - local_irq_disable(); + raw_local_irq_save(flags); disable_local_APIC(); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -503,7 +505,7 @@ static int lapic_resume(struct sys_devic /* XXX: Pavel needs this for S3 resume, but can't explain why */ set_fixmap_nocache(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); - local_irq_save(flags); + raw_local_irq_save(flags); rdmsr(MSR_IA32_APICBASE, l, h); l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; @@ -526,7 +528,7 @@ static int lapic_resume(struct sys_devic apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -660,9 +662,14 @@ void __init init_apic_mappings(void) static void __setup_APIC_LVTT(unsigned int clocks) { unsigned int lvtt_value, tmp_value, ver; + int cpu = smp_processor_id(); ver = GET_APIC_VERSION(apic_read(APIC_LVR)); lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; + + if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) + lvtt_value |= APIC_LVT_MASKED; + apic_write_around(APIC_LVTT, lvtt_value); /* @@ -680,7 +687,7 @@ static void setup_APIC_timer(unsigned in { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* For some reasons this doesn't work on Simics, so fake it for now */ if (!strstr(boot_cpu_data.x86_model_id, "Screwdriver")) { @@ -710,7 +717,7 @@ static void setup_APIC_timer(unsigned in __setup_APIC_LVTT(clocks); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -767,7 +774,7 @@ void __init setup_boot_APIC_clock (void) printk(KERN_INFO "Using local APIC timer interrupts.\n"); using_apic_timer = 1; - local_irq_disable(); + raw_local_irq_disable(); calibration_result = calibrate_APIC_clock(); /* @@ -775,17 +782,17 @@ void __init setup_boot_APIC_clock (void) */ setup_APIC_timer(calibration_result); - local_irq_enable(); + raw_local_irq_enable(); } void __cpuinit setup_secondary_APIC_clock(void) { - local_irq_disable(); /* FIXME: Do we need this? --RR */ + raw_local_irq_disable(); /* FIXME: Do we need this? --RR */ setup_APIC_timer(calibration_result); - local_irq_enable(); + raw_local_irq_enable(); } -void __cpuinit disable_APIC_timer(void) +void disable_APIC_timer(void) { if (using_apic_timer) { unsigned long v; @@ -797,7 +804,10 @@ void __cpuinit disable_APIC_timer(void) void enable_APIC_timer(void) { - if (using_apic_timer) { + int cpu = smp_processor_id(); + + if (using_apic_timer && + !cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { unsigned long v; v = apic_read(APIC_LVTT); @@ -805,32 +815,45 @@ void enable_APIC_timer(void) } } -/* - * the frequency of the profiling timer can be changed - * by writing a multiplier value into /proc/profile. - */ -int setup_profiling_timer(unsigned int multiplier) +void switch_APIC_timer_to_ipi(void *cpumask) { - int i; + cpumask_t mask = *(cpumask_t *)cpumask; + int cpu = smp_processor_id(); - /* - * Sanity check. [at least 500 APIC cycles should be - * between APIC interrupts as a rule of thumb, to avoid - * irqs flooding us] - */ - if ( (!multiplier) || (calibration_result/multiplier < 500)) - return -EINVAL; + if (cpu_isset(cpu, mask) && + !cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { + disable_APIC_timer(); + cpu_set(cpu, timer_interrupt_broadcast_ipi_mask); + } +} +EXPORT_SYMBOL(switch_APIC_timer_to_ipi); - /* - * Set the new multiplier for each CPU. CPUs don't start using the - * new values until the next timer interrupt in which they do process - * accounting. At that time they also adjust their APIC timers - * accordingly. - */ - for (i = 0; i < NR_CPUS; ++i) - per_cpu(prof_multiplier, i) = multiplier; +void smp_send_timer_broadcast_ipi(void) +{ + cpumask_t mask; - return 0; + cpus_and(mask, cpu_online_map, timer_interrupt_broadcast_ipi_mask); + if (!cpus_empty(mask)) { + send_IPI_mask(mask, LOCAL_TIMER_VECTOR); + } +} + +void switch_ipi_to_APIC_timer(void *cpumask) +{ + cpumask_t mask = *(cpumask_t *)cpumask; + int cpu = smp_processor_id(); + + if (cpu_isset(cpu, mask) && + cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { + cpu_clear(cpu, timer_interrupt_broadcast_ipi_mask); + enable_APIC_timer(); + } +} +EXPORT_SYMBOL(switch_ipi_to_APIC_timer); + +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; } #ifdef CONFIG_X86_MCE_AMD @@ -857,32 +880,10 @@ void setup_threshold_lvt(unsigned long l void smp_local_timer_interrupt(struct pt_regs *regs) { - int cpu = smp_processor_id(); - profile_tick(CPU_PROFILING, regs); - if (--per_cpu(prof_counter, cpu) <= 0) { - /* - * The multiplier may have changed since the last time we got - * to this point as a result of the user writing to - * /proc/profile. In this case we need to adjust the APIC - * timer accordingly. - * - * Interrupts are already masked off at this point. - */ - per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu); - if (per_cpu(prof_counter, cpu) != - per_cpu(prof_old_multiplier, cpu)) { - __setup_APIC_LVTT(calibration_result/ - per_cpu(prof_counter, cpu)); - per_cpu(prof_old_multiplier, cpu) = - per_cpu(prof_counter, cpu); - } - #ifdef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode(regs)); #endif - } - /* * We take the 'long' return path, and there every subsystem * grabs the appropriate locks (kernel lock/ irq lock). Index: linux.prev/arch/x86_64/kernel/early_printk.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/early_printk.c +++ linux.prev/arch/x86_64/kernel/early_printk.c @@ -206,7 +206,7 @@ static int early_console_initialized = 0 void early_printk(const char *fmt, ...) { - char buf[512]; + static char buf[512]; int n; va_list ap; Index: linux.prev/arch/x86_64/kernel/entry.S =================================================================== --- linux.prev.orig/arch/x86_64/kernel/entry.S +++ linux.prev/arch/x86_64/kernel/entry.S @@ -48,6 +48,15 @@ #define retint_kernel retint_restore_args #endif +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING +# define CALL_TRACE_IRQS_ON \ + push %rbp; \ + mov %rsp, %rbp; \ + call trace_irqs_on; \ + leaveq +#else +# define CALL_TRACE_IRQS_ON +#endif /* * C code is not supposed to know about undefined top of stack. Every time * a C function with an pt_regs argument is called from the SYSCALL based @@ -230,8 +239,8 @@ sysret_check: /* edx: work, edi: workmask */ sysret_careful: CFI_RESTORE_STATE - bt $TIF_NEED_RESCHED,%edx - jnc sysret_signal + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx + jz sysret_signal sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 @@ -252,7 +261,7 @@ sysret_signal: leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 xorl %esi,%esi # oldset -> arg2 call ptregscall_common -1: movl $_TIF_NEED_RESCHED,%edi +1: movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi jmp sysret_check badsys: @@ -319,8 +328,8 @@ int_with_check: /* First do a reschedule test. */ /* edx: work, edi: workmask */ int_careful: - bt $TIF_NEED_RESCHED,%edx - jnc int_very_careful + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx + jz int_very_careful sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 @@ -353,7 +362,7 @@ int_signal: movq %rsp,%rdi # &ptregs -> arg1 xorl %esi,%esi # oldset -> arg2 call do_notify_resume -1: movl $_TIF_NEED_RESCHED,%edi +1: movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi int_restore_rest: RESTORE_REST cli @@ -554,8 +563,8 @@ bad_iret: /* edi: workmask, edx: work */ retint_careful: CFI_RESTORE_STATE - bt $TIF_NEED_RESCHED,%edx - jnc retint_signal + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx + jz retint_signal sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 @@ -577,7 +586,7 @@ retint_signal: call do_notify_resume RESTORE_REST cli - movl $_TIF_NEED_RESCHED,%edi + movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi GET_THREAD_INFO(%rcx) jmp retint_check @@ -593,6 +602,7 @@ retint_kernel: bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ jnc retint_restore_args call preempt_schedule_irq + CALL_TRACE_IRQS_ON jmp exit_intr #endif CFI_ENDPROC @@ -1041,3 +1051,41 @@ ENTRY(call_softirq) CFI_ADJUST_CFA_OFFSET -8 ret CFI_ENDPROC + +#ifdef CONFIG_LATENCY_TRACE + +ENTRY(mcount) + cmpq $0, trace_enabled + jz out + + push %rbp + mov %rsp,%rbp + + push %r9 + push %r8 + push %rdi + push %rsi + push %rdx + push %rcx + push %rax + + mov 0x0(%rbp),%rax + mov 0x8(%rbp),%rdi + mov 0x8(%rax),%rsi + + call __trace + + pop %rax + pop %rcx + pop %rdx + pop %rsi + pop %rdi + pop %r8 + pop %r9 + + leaveq +out: + ret + +#endif + Index: linux.prev/arch/x86_64/kernel/genapic_flat.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/genapic_flat.c +++ linux.prev/arch/x86_64/kernel/genapic_flat.c @@ -50,8 +50,8 @@ static void flat_send_IPI_mask(cpumask_t unsigned long cfg; unsigned long flags; - local_save_flags(flags); - local_irq_disable(); + raw_local_save_flags(flags); + raw_local_irq_disable(); /* * Wait for idle. @@ -73,7 +73,7 @@ static void flat_send_IPI_mask(cpumask_t * Send the IPI. The write to APIC_ICR fires this off. */ apic_write(APIC_ICR, cfg); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void flat_send_IPI_allbutself(int vector) Index: linux.prev/arch/x86_64/kernel/i8259.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/i8259.c +++ linux.prev/arch/x86_64/kernel/i8259.c @@ -127,7 +127,7 @@ void (*interrupt[NR_IRQS])(void) = { * moves to arch independent land */ -DEFINE_SPINLOCK(i8259A_lock); +DEFINE_RAW_SPINLOCK(i8259A_lock); static void end_8259A_irq (unsigned int irq) { @@ -448,7 +448,7 @@ device_initcall(i8259A_init_sysfs); * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, SA_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL}; void __init init_ISA_irqs (void) { Index: linux.prev/arch/x86_64/kernel/init_task.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/init_task.c +++ linux.prev/arch/x86_64/kernel/init_task.c @@ -10,8 +10,8 @@ #include #include -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); Index: linux.prev/arch/x86_64/kernel/io_apic.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/io_apic.c +++ linux.prev/arch/x86_64/kernel/io_apic.c @@ -46,7 +46,7 @@ static int no_timer_check; int disable_timer_pin_1 __initdata; -static DEFINE_SPINLOCK(ioapic_lock); +static DEFINE_RAW_SPINLOCK(ioapic_lock); /* * # of IRQ routing registers @@ -94,6 +94,9 @@ int vector_irq[NR_VECTORS] __read_mostly reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ reg ACTION; \ io_apic_modify(entry->apic, reg); \ + /* Force POST flush by reading: */ \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + \ if (!entry->next) \ break; \ entry = irq_2_pin + entry->next; \ @@ -160,10 +163,8 @@ static void add_pin_to_irq(unsigned int static void name##_IO_APIC_irq (unsigned int irq) \ __DO_ACTION(R, ACTION, FINAL) -DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) ) - /* mask = 1 */ -DO_ACTION( __unmask, 0, &= 0xfffeffff, ) - /* mask = 0 */ +DO_ACTION( __mask, 0, |= 0x00010000, ) /* mask = 1 */ +DO_ACTION( __unmask, 0, &= 0xfffeffff, ) /* mask = 0 */ static void mask_IO_APIC_irq (unsigned int irq) { @@ -1338,7 +1339,7 @@ static int __init timer_irq_works(void) { unsigned long t1 = jiffies; - local_irq_enable(); + raw_local_irq_enable(); /* Let ten ticks pass... */ mdelay((10 * 1000) / HZ); @@ -1431,12 +1432,50 @@ static unsigned int startup_level_ioapic return 0; /* don't check for pending */ } +/* + * In the preemptible case mask the IRQ first then handle it and ack it. + * + * (In the non-preemptible case we keep the IRQ unacked in the local APIC + * and dont need to do the masking, because the code executes atomically.) + */ +#ifdef CONFIG_PREEMPT_HARDIRQS + +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ + move_irq(irq); + mask_IO_APIC_irq(irq); + ack_APIC_irq(); +} + +static void end_level_ioapic_irq(unsigned int irq) +{ + if (!(irq_desc[irq].status & IRQ_INPROGRESS)) + unmask_IO_APIC_irq(irq); +} + +static void enable_level_ioapic_irq(unsigned int irq) +{ + unmask_IO_APIC_irq(irq); +} + +#else /* !CONFIG_PREEMPT_HARDIRQS */ + +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ +} + static void end_level_ioapic_irq (unsigned int irq) { move_irq(irq); ack_APIC_irq(); } +static void enable_level_ioapic_irq(unsigned int irq) +{ + unmask_IO_APIC_irq(irq); +} +#endif /* !CONFIG_PREEMPT_HARDIRQS */ + #ifdef CONFIG_PCI_MSI static unsigned int startup_edge_ioapic_vector(unsigned int vector) { @@ -1460,6 +1499,13 @@ static unsigned int startup_level_ioapic return startup_level_ioapic_irq (irq); } +static void mask_and_ack_level_ioapic_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + mask_and_ack_level_ioapic_irq(irq); +} + static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); @@ -1468,6 +1514,11 @@ static void end_level_ioapic_vector (uns end_level_ioapic_irq(irq); } +static void enable_level_ioapic_vector(unsigned int vector) +{ + enable_level_ioapic_irq(vector_to_irq(vector)); +} + static void mask_IO_APIC_vector (unsigned int vector) { int irq = vector_to_irq(vector); Index: linux.prev/arch/x86_64/kernel/irq.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/irq.c +++ linux.prev/arch/x86_64/kernel/irq.c @@ -129,9 +129,9 @@ void fixup_irqs(cpumask_t map) } /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); + raw_local_irq_enable(); mdelay(1); - local_irq_disable(); + raw_local_irq_disable(); } #endif @@ -145,11 +145,11 @@ asmlinkage void do_softirq(void) if (in_interrupt()) return; - local_irq_save(flags); + raw_local_irq_save(flags); pending = local_softirq_pending(); /* Switch to interrupt stack */ if (pending) call_softirq(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(do_softirq); Index: linux.prev/arch/x86_64/kernel/machine_kexec.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/machine_kexec.c +++ linux.prev/arch/x86_64/kernel/machine_kexec.c @@ -190,7 +190,7 @@ NORET_TYPE void machine_kexec(struct kim relocate_new_kernel_t rnk; /* Interrupts aren't acceptable while we reboot */ - local_irq_disable(); + raw_local_irq_disable(); /* Calculate the offsets */ page_list = image->head; Index: linux.prev/arch/x86_64/kernel/nmi.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/nmi.c +++ linux.prev/arch/x86_64/kernel/nmi.c @@ -43,7 +43,7 @@ * This is maintained separately from nmi_active because the NMI * watchdog may also be driven from the I/O APIC timer. */ -static DEFINE_SPINLOCK(lapic_nmi_owner_lock); +static DEFINE_RAW_SPINLOCK(lapic_nmi_owner_lock); static unsigned int lapic_nmi_owner; #define LAPIC_NMI_WATCHDOG (1<<0) #define LAPIC_NMI_RESERVED (1<<1) @@ -127,7 +127,7 @@ void __cpuinit nmi_watchdog_default(void static __init void nmi_cpu_busy(void *data) { volatile int *endflag = data; - local_irq_enable(); + raw_local_irq_enable(); /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, even if there is a simulator or similar that catches the @@ -156,7 +156,7 @@ int __init check_nmi_watchdog (void) for (cpu = 0; cpu < NR_CPUS; cpu++) counts[cpu] = cpu_pda[cpu].__nmi_count; - local_irq_enable(); + raw_local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks for (cpu = 0; cpu < NR_CPUS; cpu++) { @@ -466,12 +466,42 @@ void touch_nmi_watchdog (void) touch_softlockup_watchdog(); } +int nmi_show_regs[NR_CPUS]; + +void nmi_show_all_regs(void) +{ + int i; + + if (nmi_watchdog == NMI_NONE) + return; + if (system_state != SYSTEM_RUNNING) { + printk("nmi_show_all_regs(): system state %d, not doing.\n", + system_state); + return; + } + + for_each_online_cpu(i) + nmi_show_regs[i] = 1; + for_each_online_cpu(i) + while (nmi_show_regs[i] == 1) + barrier(); +} + +static DEFINE_RAW_SPINLOCK(nmi_print_lock); + void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) { int sum; int touched = 0; + int cpu = safe_smp_processor_id(); sum = read_pda(apic_timer_irqs); + if (nmi_show_regs[cpu]) { + nmi_show_regs[cpu] = 0; + spin_lock(&nmi_print_lock); + show_regs(regs); + spin_unlock(&nmi_print_lock); + } if (__get_cpu_var(nmi_touch)) { __get_cpu_var(nmi_touch) = 0; touched = 1; @@ -483,6 +513,11 @@ void nmi_watchdog_tick (struct pt_regs * */ local_inc(&__get_cpu_var(alert_counter)); if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) { + int i; + + for (i = 0; i < NR_CPUS; i++) + nmi_show_regs[i] = 1; + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) { local_set(&__get_cpu_var(alert_counter), 0); Index: linux.prev/arch/x86_64/kernel/pmtimer.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/pmtimer.c +++ /dev/null @@ -1,101 +0,0 @@ -/* Ported over from i386 by AK, original copyright was: - * - * (C) Dominik Brodowski 2003 - * - * Driver to use the Power Management Timer (PMTMR) available in some - * southbridges as primary timing source for the Linux kernel. - * - * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, - * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. - * - * This file is licensed under the GPL v2. - * - * Dropped all the hardware bug workarounds for now. Hopefully they - * are not needed on 64bit chipsets. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* The I/O port the PMTMR resides at. - * The location is detected during setup_arch(), - * in arch/i386/kernel/acpi/boot.c */ -u32 pmtmr_ioport; - -/* value of the Power timer at last timer interrupt */ -static u32 offset_delay; -static u32 last_pmtmr_tick; - -#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ - -static inline u32 cyc2us(u32 cycles) -{ - /* The Power Management Timer ticks at 3.579545 ticks per microsecond. - * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] - * - * Even with HZ = 100, delta is at maximum 35796 ticks, so it can - * easily be multiplied with 286 (=0x11E) without having to fear - * u32 overflows. - */ - cycles *= 286; - return (cycles >> 10); -} - -int pmtimer_mark_offset(void) -{ - static int first_run = 1; - unsigned long tsc; - u32 lost; - - u32 tick = inl(pmtmr_ioport); - u32 delta; - - delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK); - - last_pmtmr_tick = tick; - monotonic_base += delta * NSEC_PER_USEC; - - delta += offset_delay; - - lost = delta / (USEC_PER_SEC / HZ); - offset_delay = delta % (USEC_PER_SEC / HZ); - - rdtscll(tsc); - vxtime.last_tsc = tsc - offset_delay * cpu_khz; - - /* don't calculate delay for first run, - or if we've got less then a tick */ - if (first_run || (lost < 1)) { - first_run = 0; - offset_delay = 0; - } - - return lost - 1; -} - -unsigned int do_gettimeoffset_pm(void) -{ - u32 now, offset, delta = 0; - - offset = last_pmtmr_tick; - now = inl(pmtmr_ioport); - delta = (now - offset) & ACPI_PM_MASK; - - return offset_delay + cyc2us(delta); -} - - -static int __init nopmtimer_setup(char *s) -{ - pmtmr_ioport = 0; - return 0; -} - -__setup("nopmtimer", nopmtimer_setup); Index: linux.prev/arch/x86_64/kernel/process.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/process.c +++ linux.prev/arch/x86_64/kernel/process.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -60,6 +61,12 @@ static atomic_t hlt_counter = ATOMIC_INI unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); +DEFINE_SPINLOCK(pm_idle_switch_lock); +EXPORT_SYMBOL_GPL(pm_idle_switch_lock); + +int pm_idle_locked = 0; +EXPORT_SYMBOL_GPL(pm_idle_locked); + /* * Powermanagement idle function, if any.. */ @@ -86,21 +93,21 @@ EXPORT_SYMBOL(enable_hlt); */ void default_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); if (!atomic_read(&hlt_counter)) { clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb__after_clear_bit(); - while (!need_resched()) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); + while (!need_resched() && !need_resched_delayed()) { + raw_local_irq_disable(); + if (!need_resched() && !need_resched_delayed()) + raw_safe_halt(); else - local_irq_enable(); + raw_local_irq_enable(); } set_thread_flag(TIF_POLLING_NRFLAG); } else { - while (!need_resched()) + while (!need_resched() && !need_resched_delayed()) cpu_relax(); } } @@ -110,9 +117,9 @@ void default_idle(void) * to poll the ->need_resched flag instead of waiting for the * cross-CPU IPI to arrive. Use this option with caution. */ -static void poll_idle (void) +void poll_idle (void) { - local_irq_enable(); + raw_local_irq_enable(); asm volatile( "2:" @@ -188,7 +195,9 @@ void cpu_idle (void) /* endless idle loop with no priority at all */ while (1) { - while (!need_resched()) { + BUG_ON(raw_irqs_disabled()); + + while (!need_resched() && !need_resched_delayed()) { void (*idle)(void); if (__get_cpu_var(cpu_idle_state)) @@ -200,12 +209,15 @@ void cpu_idle (void) idle = default_idle; if (cpu_is_offline(smp_processor_id())) play_dead(); + stop_critical_timing(); + propagate_preempt_locks_value(); idle(); } - - preempt_enable_no_resched(); - schedule(); + raw_local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + raw_local_irq_enable(); } } @@ -218,12 +230,12 @@ void cpu_idle (void) */ static void mwait_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); - while (!need_resched()) { + while (!need_resched() && !need_resched_delayed()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); - if (need_resched()) + if (need_resched() || need_resched_delayed()) break; __mwait(0, 0); } @@ -314,7 +326,7 @@ void show_regs(struct pt_regs *regs) { printk("CPU %d:", smp_processor_id()); __show_regs(regs); - show_trace(®s->rsp); + show_trace(current, ®s->rsp); } /* @@ -333,13 +345,14 @@ void exit_thread(void) kprobe_flush_task(me); if (me->thread.io_bitmap_ptr) { - struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); + struct tss_struct *tss; kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; /* * Careful, clear this in the TSS too: */ + tss = &per_cpu(init_tss, get_cpu()); memset(tss->io_bitmap, 0xff, t->io_bitmap_max); t->io_bitmap_max = 0; put_cpu(); Index: linux.prev/arch/x86_64/kernel/reboot.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/reboot.c +++ linux.prev/arch/x86_64/kernel/reboot.c @@ -99,7 +99,7 @@ void machine_shutdown(void) smp_send_stop(); #endif - local_irq_save(flags); + raw_local_irq_save(flags); #ifndef CONFIG_SMP disable_local_APIC(); @@ -107,7 +107,7 @@ void machine_shutdown(void) disable_IO_APIC(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void machine_emergency_restart(void) Index: linux.prev/arch/x86_64/kernel/setup.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/setup.c +++ linux.prev/arch/x86_64/kernel/setup.c @@ -993,6 +993,7 @@ static void __cpuinit init_intel(struct c->x86_cache_alignment = c->x86_clflush_size * 2; if (c->x86 >= 15) set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); Index: linux.prev/arch/x86_64/kernel/signal.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/signal.c +++ linux.prev/arch/x86_64/kernel/signal.c @@ -434,6 +434,13 @@ int do_signal(struct pt_regs *regs, sigs siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + raw_local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which * is why we may in certain cases get here from Index: linux.prev/arch/x86_64/kernel/smp.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/smp.c +++ linux.prev/arch/x86_64/kernel/smp.c @@ -297,10 +297,20 @@ void smp_send_reschedule(int cpu) } /* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + send_IPI_allbutself(RESCHEDULE_VECTOR); +} + +/* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); struct call_data_struct { void (*func) (void *info); @@ -455,9 +465,9 @@ void smp_stop_cpu(void) * Remove this CPU: */ cpu_clear(smp_processor_id(), cpu_online_map); - local_irq_save(flags); + raw_local_irq_save(flags); disable_local_APIC(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void smp_really_stop_cpu(void *dummy) @@ -481,9 +491,9 @@ void smp_send_stop(void) if (!nolock) spin_unlock(&call_lock); - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); - local_irq_enable(); + raw_local_irq_enable(); } /* Index: linux.prev/arch/x86_64/kernel/smpboot.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/smpboot.c +++ linux.prev/arch/x86_64/kernel/smpboot.c @@ -200,7 +200,7 @@ static void __cpuinit smp_store_cpu_info latency and low latency is the primary objective here. -AK */ #define no_cpu_relax() barrier() -static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock); +static __cpuinitdata __DEFINE_RAW_SPINLOCK(tsc_sync_lock); static volatile __cpuinitdata unsigned long go[SLAVE + 1]; static int notscsync __cpuinitdata; @@ -216,7 +216,7 @@ static __cpuinit void sync_master(void * go[MASTER] = 0; - local_irq_save(flags); + raw_local_irq_save(flags); { for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { while (!go[MASTER]) @@ -225,7 +225,7 @@ static __cpuinit void sync_master(void * rdtscll(go[SLAVE]); } } - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -335,7 +335,13 @@ static __cpuinit void sync_tsc(unsigned static void __cpuinit tsc_sync_wait(void) { - if (notscsync || !cpu_has_tsc) + /* + * When the CPU has synchronized TSCs assume the BIOS + * or the hardware already synced. Otherwise we could + * mess up a possible perfect synchronization with a + * not-quite-perfect algorithm. + */ + if (notscsync || !cpu_has_tsc || !unsynchronized_tsc()) return; sync_tsc(0); } @@ -1080,7 +1086,7 @@ int __cpuinit __cpu_up(unsigned int cpu) int err; int apicid = cpu_present_to_apicid(cpu); - WARN_ON(irqs_disabled()); + WARN_ON(raw_irqs_disabled()); Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); Index: linux.prev/arch/x86_64/kernel/switch2poll.c =================================================================== --- /dev/null +++ linux.prev/arch/x86_64/kernel/switch2poll.c @@ -0,0 +1,112 @@ +#include +#include +#include +#include +#include + +extern void poll_idle (void); + +#define KERNEL_ATTR_RW(_name) \ +static struct subsys_attribute _name##_attr = \ + __ATTR(_name, 0644, _name##_show, _name##_store) + +static struct idlep_kobject +{ + struct kobject kobj; + int is_poll; + void (*idle)(void); +} idle_kobj; + +static ssize_t idle_poll_show(struct subsystem *subsys, char *page) +{ + return sprintf(page, "%s\n", (idle_kobj.is_poll ? "on" : "off")); +} + +static ssize_t idle_poll_store(struct subsystem *subsys, + const char *buf, size_t len) +{ + unsigned long flags; + + spin_lock_irqsave(&pm_idle_switch_lock, flags); + + /* + * If power management is handling the idle function, + * then leave it be. + */ + if (pm_idle_locked) { + len = -EBUSY; + goto out; + } + + if (strncmp(buf,"1",1)==0 || + (len >=2 && strncmp(buf,"on",2)==0)) { + if (idle_kobj.is_poll != 1) { + idle_kobj.is_poll = 1; + boot_option_idle_override = 1; + idle_kobj.idle = pm_idle; + pm_idle = poll_idle; + } + } else if (strncmp(buf,"0",1)==0 || + (len >= 3 && strncmp(buf,"off",3)==0)) { + if (idle_kobj.is_poll != 0) { + boot_option_idle_override = 0; + idle_kobj.is_poll = 0; + pm_idle = idle_kobj.idle; + } + } + +out: + spin_unlock_irqrestore(&pm_idle_switch_lock, flags); + + return len; +} + + +KERNEL_ATTR_RW(idle_poll); + +static struct attribute * idle_attrs[] = { + &idle_poll_attr.attr, + NULL +}; + +static struct attribute_group idle_attr_group = { + .attrs = idle_attrs, +}; + +static int __init idle_poll_set_init(void) +{ + int err; + + /* + * If the default is alread poll_idle then + * don't even bother with this. + */ + if (pm_idle == poll_idle) + return 0; + + memset(&idle_kobj, 0, sizeof(idle_kobj)); + + idle_kobj.is_poll = 0; + idle_kobj.idle = pm_idle; + + err = kobject_set_name(&idle_kobj.kobj, "%s", "idle"); + if (err) + goto out; + + idle_kobj.kobj.parent = &kernel_subsys.kset.kobj; + err = kobject_register(&idle_kobj.kobj); + if (err) + goto out; + + err = sysfs_create_group(&idle_kobj.kobj, + &idle_attr_group); + if (err) + goto out; + + return 0; +out: + printk(KERN_INFO "Problem setting up sysfs idle_poll\n"); + return 0; +} + +late_initcall(idle_poll_set_init); Index: linux.prev/arch/x86_64/kernel/time.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/time.c +++ linux.prev/arch/x86_64/kernel/time.c @@ -26,6 +26,7 @@ #include #include #include + #ifdef CONFIG_ACPI #include /* for PM timer frequency */ #endif @@ -38,25 +39,26 @@ #include #include #include +#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #endif -#ifdef CONFIG_CPU_FREQ -static void cpufreq_delayed_get(void); -#endif extern void i8254_timer_resume(void); extern int using_apic_timer; -DEFINE_SPINLOCK(rtc_lock); -DEFINE_SPINLOCK(i8253_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(i8253_lock); static int nohpet __initdata = 0; static int notsc __initdata = 0; #undef HPET_HACK_ENABLE_DANGEROUS -unsigned int cpu_khz; /* TSC clocks / usec, not used here */ +unsigned int cpu_khz; /* CPU clocks / usec, not used here */ +unsigned int tsc_khz; /* TSC clocks / usec, not used here */ +unsigned long hpet_address; static unsigned long hpet_period; /* fsecs / HPET clock */ unsigned long hpet_tick; /* HPET clocks / interrupt */ static int hpet_use_timer; /* Use