summaryrefslogtreecommitdiff
path: root/net/core/drop_monitor.c
AgeCommit message (Collapse)Author
2017-11-21treewide: setup_timer() -> timer_setup()Kees Cook
This converts all remaining cases of the old setup_timer() API into using timer_setup(), where the callback argument is the structure already holding the struct timer_list. These should have no behavioral changes, since they just change which pointer is passed into the callback with the same available pointers after conversion. It handles the following examples, in addition to some other variations. Casting from unsigned long: void my_callback(unsigned long data) { struct something *ptr = (struct something *)data; ... } ... setup_timer(&ptr->my_timer, my_callback, ptr); and forced object casts: void my_callback(struct something *ptr) { ... } ... setup_timer(&ptr->my_timer, my_callback, (unsigned long)ptr); become: void my_callback(struct timer_list *t) { struct something *ptr = from_timer(ptr, t, my_timer); ... } ... timer_setup(&ptr->my_timer, my_callback, 0); Direct function assignments: void my_callback(unsigned long data) { struct something *ptr = (struct something *)data; ... } ... ptr->my_timer.function = my_callback; have a temporary cast added, along with converting the args: void my_callback(struct timer_list *t) { struct something *ptr = from_timer(ptr, t, my_timer); ... } ... ptr->my_timer.function = (TIMER_FUNC_TYPE)my_callback; And finally, callbacks without a data assignment: void my_callback(unsigned long data) { ... } ... setup_timer(&ptr->my_timer, my_callback, 0); have their argument renamed to verify they're unused during conversion: void my_callback(struct timer_list *unused) { ... } ... timer_setup(&ptr->my_timer, my_callback, 0); The conversion is done with the following Coccinelle script: spatch --very-quiet --all-includes --include-headers \ -I ./arch/x86/include -I ./arch/x86/include/generated \ -I ./include -I ./arch/x86/include/uapi \ -I ./arch/x86/include/generated/uapi -I ./include/uapi \ -I ./include/generated/uapi --include ./include/linux/kconfig.h \ --dir . \ --cocci-file ~/src/data/timer_setup.cocci @fix_address_of@ expression e; @@ setup_timer( -&(e) +&e , ...) // Update any raw setup_timer() usages that have a NULL callback, but // would otherwise match change_timer_function_usage, since the latter // will update all function assignments done in the face of a NULL // function initialization in setup_timer(). @change_timer_function_usage_NULL@ expression _E; identifier _timer; type _cast_data; @@ ( -setup_timer(&_E->_timer, NULL, _E); +timer_setup(&_E->_timer, NULL, 0); | -setup_timer(&_E->_timer, NULL, (_cast_data)_E); +timer_setup(&_E->_timer, NULL, 0); | -setup_timer(&_E._timer, NULL, &_E); +timer_setup(&_E._timer, NULL, 0); | -setup_timer(&_E._timer, NULL, (_cast_data)&_E); +timer_setup(&_E._timer, NULL, 0); ) @change_timer_function_usage@ expression _E; identifier _timer; struct timer_list _stl; identifier _callback; type _cast_func, _cast_data; @@ ( -setup_timer(&_E->_timer, _callback, _E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, &_callback, _E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, _callback, (_cast_data)_E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, &_callback, (_cast_data)_E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, (_cast_func)_callback, _E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, (_cast_func)&_callback, _E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, (_cast_func)_callback, (_cast_data)_E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, (_cast_func)&_callback, (_cast_data)_E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E._timer, _callback, (_cast_data)_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, _callback, (_cast_data)&_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, &_callback, (_cast_data)_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, &_callback, (_cast_data)&_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, (_cast_func)_callback, (_cast_data)_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, (_cast_func)_callback, (_cast_data)&_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, (_cast_func)&_callback, (_cast_data)_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, (_cast_func)&_callback, (_cast_data)&_E); +timer_setup(&_E._timer, _callback, 0); | _E->_timer@_stl.function = _callback; | _E->_timer@_stl.function = &_callback; | _E->_timer@_stl.function = (_cast_func)_callback; | _E->_timer@_stl.function = (_cast_func)&_callback; | _E._timer@_stl.function = _callback; | _E._timer@_stl.function = &_callback; | _E._timer@_stl.function = (_cast_func)_callback; | _E._timer@_stl.function = (_cast_func)&_callback; ) // callback(unsigned long arg) @change_callback_handle_cast depends on change_timer_function_usage@ identifier change_timer_function_usage._callback; identifier change_timer_function_usage._timer; type _origtype; identifier _origarg; type _handletype; identifier _handle; @@ void _callback( -_origtype _origarg +struct timer_list *t ) { ( ... when != _origarg _handletype *_handle = -(_handletype *)_origarg; +from_timer(_handle, t, _timer); ... when != _origarg | ... when != _origarg _handletype *_handle = -(void *)_origarg; +from_timer(_handle, t, _timer); ... when != _origarg | ... when != _origarg _handletype *_handle; ... when != _handle _handle = -(_handletype *)_origarg; +from_timer(_handle, t, _timer); ... when != _origarg | ... when != _origarg _handletype *_handle; ... when != _handle _handle = -(void *)_origarg; +from_timer(_handle, t, _timer); ... when != _origarg ) } // callback(unsigned long arg) without existing variable @change_callback_handle_cast_no_arg depends on change_timer_function_usage && !change_callback_handle_cast@ identifier change_timer_function_usage._callback; identifier change_timer_function_usage._timer; type _origtype; identifier _origarg; type _handletype; @@ void _callback( -_origtype _origarg +struct timer_list *t ) { + _handletype *_origarg = from_timer(_origarg, t, _timer); + ... when != _origarg - (_handletype *)_origarg + _origarg ... when != _origarg } // Avoid already converted callbacks. @match_callback_converted depends on change_timer_function_usage && !change_callback_handle_cast && !change_callback_handle_cast_no_arg@ identifier change_timer_function_usage._callback; identifier t; @@ void _callback(struct timer_list *t) { ... } // callback(struct something *handle) @change_callback_handle_arg depends on change_timer_function_usage && !match_callback_converted && !change_callback_handle_cast && !change_callback_handle_cast_no_arg@ identifier change_timer_function_usage._callback; identifier change_timer_function_usage._timer; type _handletype; identifier _handle; @@ void _callback( -_handletype *_handle +struct timer_list *t ) { + _handletype *_handle = from_timer(_handle, t, _timer); ... } // If change_callback_handle_arg ran on an empty function, remove // the added handler. @unchange_callback_handle_arg depends on change_timer_function_usage && change_callback_handle_arg@ identifier change_timer_function_usage._callback; identifier change_timer_function_usage._timer; type _handletype; identifier _handle; identifier t; @@ void _callback(struct timer_list *t) { - _handletype *_handle = from_timer(_handle, t, _timer); } // We only want to refactor the setup_timer() data argument if we've found // the matching callback. This undoes changes in change_timer_function_usage. @unchange_timer_function_usage depends on change_timer_function_usage && !change_callback_handle_cast && !change_callback_handle_cast_no_arg && !change_callback_handle_arg@ expression change_timer_function_usage._E; identifier change_timer_function_usage._timer; identifier change_timer_function_usage._callback; type change_timer_function_usage._cast_data; @@ ( -timer_setup(&_E->_timer, _callback, 0); +setup_timer(&_E->_timer, _callback, (_cast_data)_E); | -timer_setup(&_E._timer, _callback, 0); +setup_timer(&_E._timer, _callback, (_cast_data)&_E); ) // If we fixed a callback from a .function assignment, fix the // assignment cast now. @change_timer_function_assignment depends on change_timer_function_usage && (change_callback_handle_cast || change_callback_handle_cast_no_arg || change_callback_handle_arg)@ expression change_timer_function_usage._E; identifier change_timer_function_usage._timer; identifier change_timer_function_usage._callback; type _cast_func; typedef TIMER_FUNC_TYPE; @@ ( _E->_timer.function = -_callback +(TIMER_FUNC_TYPE)_callback ; | _E->_timer.function = -&_callback +(TIMER_FUNC_TYPE)_callback ; | _E->_timer.function = -(_cast_func)_callback; +(TIMER_FUNC_TYPE)_callback ; | _E->_timer.function = -(_cast_func)&_callback +(TIMER_FUNC_TYPE)_callback ; | _E._timer.function = -_callback +(TIMER_FUNC_TYPE)_callback ; | _E._timer.function = -&_callback; +(TIMER_FUNC_TYPE)_callback ; | _E._timer.function = -(_cast_func)_callback +(TIMER_FUNC_TYPE)_callback ; | _E._timer.function = -(_cast_func)&_callback +(TIMER_FUNC_TYPE)_callback ; ) // Sometimes timer functions are called directly. Replace matched args. @change_timer_function_calls depends on change_timer_function_usage && (change_callback_handle_cast || change_callback_handle_cast_no_arg || change_callback_handle_arg)@ expression _E; identifier change_timer_function_usage._timer; identifier change_timer_function_usage._callback; type _cast_data; @@ _callback( ( -(_cast_data)_E +&_E->_timer | -(_cast_data)&_E +&_E._timer | -_E +&_E->_timer ) ) // If a timer has been configured without a data argument, it can be // converted without regard to the callback argument, since it is unused. @match_timer_function_unused_data@ expression _E; identifier _timer; identifier _callback; @@ ( -setup_timer(&_E->_timer, _callback, 0); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, _callback, 0L); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, _callback, 0UL); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E._timer, _callback, 0); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, _callback, 0L); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, _callback, 0UL); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_timer, _callback, 0); +timer_setup(&_timer, _callback, 0); | -setup_timer(&_timer, _callback, 0L); +timer_setup(&_timer, _callback, 0); | -setup_timer(&_timer, _callback, 0UL); +timer_setup(&_timer, _callback, 0); | -setup_timer(_timer, _callback, 0); +timer_setup(_timer, _callback, 0); | -setup_timer(_timer, _callback, 0L); +timer_setup(_timer, _callback, 0); | -setup_timer(_timer, _callback, 0UL); +timer_setup(_timer, _callback, 0); ) @change_callback_unused_data depends on match_timer_function_unused_data@ identifier match_timer_function_unused_data._callback; type _origtype; identifier _origarg; @@ void _callback( -_origtype _origarg +struct timer_list *unused ) { ... when != _origarg } Signed-off-by: Kees Cook <keescook@chromium.org>
2017-03-12drop_monitor: use setup_timerGeliang Tang
Use setup_timer() instead of init_timer() to simplify the code. Signed-off-by: Geliang Tang <geliangtang@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-01-03drop_monitor: consider inserted data in genlmsg_endReiter Wolfgang
Final nlmsg_len field update must reflect inserted net_dm_drop_point data. This patch depends on previous patch: "drop_monitor: add missing call to genlmsg_end" Signed-off-by: Reiter Wolfgang <wr0112358@gmail.com> Acked-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-01-01drop_monitor: add missing call to genlmsg_endReiter Wolfgang
Update nlmsg_len field with genlmsg_end to enable userspace processing using nlmsg_next helper. Also adds error handling. Signed-off-by: Reiter Wolfgang <wr0112358@gmail.com> Acked-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-10-27genetlink: mark families as __ro_after_initJohannes Berg
Now genl_register_family() is the only thing (other than the users themselves, perhaps, but I didn't find any doing that) writing to the family struct. In all families that I found, genl_register_family() is only called from __init functions (some indirectly, in which case I've add __init annotations to clarifly things), so all can actually be marked __ro_after_init. This protects the data structure from accidental corruption. Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-10-27genetlink: statically initialize familiesJohannes Berg
Instead of providing macros/inline functions to initialize the families, make all users initialize them statically and get rid of the macros. This reduces the kernel code size by about 1.6k on x86-64 (with allyesconfig). Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-10-27genetlink: no longer support using static family IDsJohannes Berg
Static family IDs have never really been used, the only use case was the workaround I introduced for those users that assumed their family ID was also their multicast group ID. Additionally, because static family IDs would never be reserved by the generic netlink code, using a relatively low ID would only work for built-in families that can be registered immediately after generic netlink is started, which is basically only the control family (apart from the workaround code, which I also had to add code for so it would reserve those IDs) Thus, anything other than GENL_ID_GENERATE is flawed and luckily not used except in the cases I mentioned. Move those workarounds into a few lines of code, and then get rid of GENL_ID_GENERATE entirely, making it more robust. Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-01drop_monitor: make genl_multicast_group conststephen hemminger
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-07-09net: tracepoint napi:napi_poll add work and budgetJesper Dangaard Brouer
An important information for the napi_poll tracepoint is knowing the work done (packets processed) by the napi_poll() call. Add both the work done and budget, as they are related. Handle trace_napi_poll() param change in dropwatch/drop_monitor and in python perf script netdev-times.py in backward compat way, as python fortunately supports optional parameter handling. Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-26net: Replace get_cpu_var through this_cpu_ptrChristoph Lameter
Replace uses of get_cpu_var for address calculation through this_cpu_ptr. Cc: netdev@vger.kernel.org Cc: Eric Dumazet <edumazet@google.com> Acked-by: David S. Miller <davem@davemloft.net> Signed-off-by: Christoph Lameter <cl@linux.com> Signed-off-by: Tejun Heo <tj@kernel.org>
2014-07-15drop_monitor: remove unnecessary break after returnFabian Frederick
Signed-off-by: Fabian Frederick <fabf@skynet.be> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-09net: drop_monitor: fix the value of maxattrChangli Gao
maxattr in genl_family should be used to save the max attribute type, but not the max command type. Drop monitor doesn't support any attributes, so we should leave it as zero. Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-19genetlink: make multicast groups const, prevent abuseJohannes Berg
Register generic netlink multicast groups as an array with the family and give them contiguous group IDs. Then instead of passing the global group ID to the various functions that send messages, pass the ID relative to the family - for most families that's just 0 because the only have one group. This avoids the list_head and ID in each group, adding a new field for the mcast group ID offset to the family. At the same time, this allows us to prevent abusing groups again like the quota and dropmon code did, since we can now check that a family only uses a group it owns. Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-19genetlink: pass family to functions using groupsJohannes Berg
This doesn't really change anything, but prepares for the next patch that will change the APIs to pass the group ID within the family, rather than the global group ID. Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-19drop_monitor/genetlink: use proper genetlink multicast APIsJohannes Berg
The drop monitor code is abusing the genetlink API and is statically using the generic netlink multicast group 1, even if that group belongs to somebody else (which it invariably will, since it's not reserved.) Make the drop monitor code use the proper APIs to reserve a group ID, but also reserve the group id 1 in generic netlink code to preserve the userspace API. Since drop monitor can be a module, don't clear the bit for it on unregistration. Acked-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-19genetlink: only pass array to genl_register_family_with_ops()Johannes Berg
As suggested by David Miller, make genl_register_family_with_ops() a macro and pass only the array, evaluating ARRAY_SIZE() in the macro, this is a little safer. The openvswitch has some indirection, assing ops/n_ops directly in that code. This might ultimately just assign the pointers in the family initializations, saving the struct genl_family_and_ops and code (once mcast groups are handled differently.) Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-14genetlink: make all genl_ops users constJohannes Berg
Now that genl_ops are no longer modified in place when registering, they can be made const. This patch was done mostly with spatch: @@ identifier ops; @@ +const struct genl_ops ops[] = { ... }; (except the struct thing in net/openvswitch/datapath.c) Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-05-28net: pass info struct via netdevice notifierJiri Pirko
So far, only net_device * could be passed along with netdevice notifier event. This patch provides a possibility to pass custom structure able to provide info that event listener needs to know. Signed-off-by: Jiri Pirko <jiri@resnulli.us> v2->v3: fix typo on simeth shortened dev_getter shortened notifier_info struct name v1->v2: fix notifier_call parameter in call_netdevice_notifier() Signed-off-by: David S. Miller <davem@davemloft.net>
2012-06-04drop_monitor: dont sleep in atomic contextEric Dumazet
drop_monitor calls several sleeping functions while in atomic context. BUG: sleeping function called from invalid context at mm/slub.c:943 in_atomic(): 1, irqs_disabled(): 0, pid: 2103, name: kworker/0:2 Pid: 2103, comm: kworker/0:2 Not tainted 3.5.0-rc1+ #55 Call Trace: [<ffffffff810697ca>] __might_sleep+0xca/0xf0 [<ffffffff811345a3>] kmem_cache_alloc_node+0x1b3/0x1c0 [<ffffffff8105578c>] ? queue_delayed_work_on+0x11c/0x130 [<ffffffff815343fb>] __alloc_skb+0x4b/0x230 [<ffffffffa00b0360>] ? reset_per_cpu_data+0x160/0x160 [drop_monitor] [<ffffffffa00b022f>] reset_per_cpu_data+0x2f/0x160 [drop_monitor] [<ffffffffa00b03ab>] send_dm_alert+0x4b/0xb0 [drop_monitor] [<ffffffff810568e0>] process_one_work+0x130/0x4c0 [<ffffffff81058249>] worker_thread+0x159/0x360 [<ffffffff810580f0>] ? manage_workers.isra.27+0x240/0x240 [<ffffffff8105d403>] kthread+0x93/0xa0 [<ffffffff816be6d4>] kernel_thread_helper+0x4/0x10 [<ffffffff8105d370>] ? kthread_freezable_should_stop+0x80/0x80 [<ffffffff816be6d0>] ? gs_change+0xb/0xb Rework the logic to call the sleeping functions in right context. Use standard timer/workqueue api to let system chose any cpu to perform the allocation and netlink send. Also avoid a loop if reset_per_cpu_data() cannot allocate memory : use mod_timer() to wait 1/10 second before next try. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Neil Horman <nhorman@tuxdriver.com> Reviewed-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-05-29drop_monitor: Add module alias to enable automatic module loadingNeil Horman
Now that we have module alias macros for generic netlink families, lets use those to mark modules with the appropriate family names for loading Signed-off-by: Neil Horman <nhorman@tuxdriver.com> CC: Eric Dumazet <eric.dumazet@gmail.com> CC: David Miller <davem@davemloft.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-05-17drop_monitor: convert to modular buildingNeil Horman
When I first wrote drop monitor I wrote it to just build monolithically. There is no reason it can't be built modularly as well, so lets give it that flexibiity. I've tested this by building it as both a module and monolithically, and it seems to work quite well Change notes: v2) * fixed for_each_present_cpu loops to be more correct as per Eric D. * Converted exit path failures to BUG_ON as per Ben H. v3) * Converted del_timer to del_timer_sync to close race noted by Ben H. Signed-off-by: Neil Horman <nhorman@tuxdriver.com> CC: "David S. Miller" <davem@davemloft.net> CC: Eric Dumazet <eric.dumazet@gmail.com> CC: Ben Hutchings <bhutchings@solarflare.com> Reviewed-by: Ben Hutchings <bhutchings@solarflare.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-05-17net: core: Use pr_<level>Joe Perches
Use the current logging style. This enables use of dynamic debugging as well. Convert printk(KERN_<LEVEL> to pr_<level>. Add pr_fmt. Remove embedded prefixes, use %s, __func__ instead. Signed-off-by: Joe Perches <joe@perches.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-05-02drop_monitor: prevent init path from scheduling on the wrong cpuNeil Horman
I just noticed after some recent updates, that the init path for the drop monitor protocol has a minor error. drop monitor maintains a per cpu structure, that gets initalized from a single cpu. Normally this is fine, as the protocol isn't in use yet, but I recently made a change that causes a failed skb allocation to reschedule itself . Given the current code, the implication is that this workqueue reschedule will take place on the wrong cpu. If drop monitor is used early during the boot process, its possible that two cpus will access a single per-cpu structure in parallel, possibly leading to data corruption. This patch fixes the situation, by storing the cpu number that a given instance of this per-cpu data should be accessed from. In the case of a need for a reschedule, the cpu stored in the struct is assigned the rescheule, rather than the currently executing cpu Tested successfully by myself. Signed-off-by: Neil Horman <nhorman@tuxdriver.com> CC: David Miller <davem@davemloft.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-04-28drop_monitor: Make updating data->skb smp safeNeil Horman
Eric Dumazet pointed out to me that the drop_monitor protocol has some holes in its smp protections. Specifically, its possible to replace data->skb while its being written. This patch corrects that by making data->skb an rcu protected variable. That will prevent it from being overwritten while a tracepoint is modifying it. Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Reported-by: Eric Dumazet <eric.dumazet@gmail.com> CC: David Miller <davem@davemloft.net> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-04-28drop_monitor: fix sleeping in invalid context warningNeil Horman
Eric Dumazet pointed out this warning in the drop_monitor protocol to me: [ 38.352571] BUG: sleeping function called from invalid context at kernel/mutex.c:85 [ 38.352576] in_atomic(): 1, irqs_disabled(): 0, pid: 4415, name: dropwatch [ 38.352580] Pid: 4415, comm: dropwatch Not tainted 3.4.0-rc2+ #71 [ 38.352582] Call Trace: [ 38.352592] [<ffffffff8153aaf0>] ? trace_napi_poll_hit+0xd0/0xd0 [ 38.352599] [<ffffffff81063f2a>] __might_sleep+0xca/0xf0 [ 38.352606] [<ffffffff81655b16>] mutex_lock+0x26/0x50 [ 38.352610] [<ffffffff8153aaf0>] ? trace_napi_poll_hit+0xd0/0xd0 [ 38.352616] [<ffffffff810b72d9>] tracepoint_probe_register+0x29/0x90 [ 38.352621] [<ffffffff8153a585>] set_all_monitor_traces+0x105/0x170 [ 38.352625] [<ffffffff8153a8ca>] net_dm_cmd_trace+0x2a/0x40 [ 38.352630] [<ffffffff8154a81a>] genl_rcv_msg+0x21a/0x2b0 [ 38.352636] [<ffffffff810f8029>] ? zone_statistics+0x99/0xc0 [ 38.352640] [<ffffffff8154a600>] ? genl_rcv+0x30/0x30 [ 38.352645] [<ffffffff8154a059>] netlink_rcv_skb+0xa9/0xd0 [ 38.352649] [<ffffffff8154a5f0>] genl_rcv+0x20/0x30 [ 38.352653] [<ffffffff81549a7e>] netlink_unicast+0x1ae/0x1f0 [ 38.352658] [<ffffffff81549d76>] netlink_sendmsg+0x2b6/0x310 [ 38.352663] [<ffffffff8150824f>] sock_sendmsg+0x10f/0x130 [ 38.352668] [<ffffffff8150abe0>] ? move_addr_to_kernel+0x60/0xb0 [ 38.352673] [<ffffffff81515f04>] ? verify_iovec+0x64/0xe0 [ 38.352677] [<ffffffff81509c46>] __sys_sendmsg+0x386/0x390 [ 38.352682] [<ffffffff810ffaf9>] ? handle_mm_fault+0x139/0x210 [ 38.352687] [<ffffffff8165b5bc>] ? do_page_fault+0x1ec/0x4f0 [ 38.352693] [<ffffffff8106ba4d>] ? set_next_entity+0x9d/0xb0 [ 38.352699] [<ffffffff81310b49>] ? tty_ldisc_deref+0x9/0x10 [ 38.352703] [<ffffffff8106d363>] ? pick_next_task_fair+0x63/0x140 [ 38.352708] [<ffffffff8150b8d4>] sys_sendmsg+0x44/0x80 [ 38.352713] [<ffffffff8165f8e2>] system_call_fastpath+0x16/0x1b It stems from holding a spinlock (trace_state_lock) while attempting to register or unregister tracepoint hooks, making in_atomic() true in this context, leading to the warning when the tracepoint calls might_sleep() while its taking a mutex. Since we only use the trace_state_lock to prevent trace protocol state races, as well as hardware stat list updates on an rcu write side, we can just convert the spinlock to a mutex to avoid this problem. Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Reported-by: Eric Dumazet <eric.dumazet@gmail.com> CC: David Miller <davem@davemloft.net> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-04-21drop_monitor: allow more events per secondEric Dumazet
It seems there is a logic error in trace_drop_common(), since we store only 64 drops, even if they are from same location. This fix is a one liner, but we probably need more work to avoid useless atomic dec/inc Now I can watch 1 Mpps drops through dropwatch... Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Neil Horman <nhorman@tuxdriver.com> Acked-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2011-05-07net,rcu: convert call_rcu(free_dm_hw_stat) to kfree_rcu()Lai Jiangshan
The rcu callback free_dm_hw_stat() just calls a kfree(), so we use kfree_rcu() instead of the call_rcu(free_dm_hw_stat). Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> Acked-by: Neil Horman <nhorman@tuxdriver.com> Acked-by: David S. Miller <davem@davemloft.net> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Josh Triplett <josh@joshtriplett.org>
2011-03-21net: fix incorrect spelling in drop monitor protocolNeil Horman
It was pointed out to me recently that my spelling could be better :) Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-07-26drop_monitor: use genl_register_family_with_ops()Changli Gao
[ Fix unused local variable build warnings. -DaveM ] Signed-off-by: Changli Gao <xiaosuo@gmail.com> Acked-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-07-20drop_monitor: Add error code to detect duplicate state changesNeil Horman
Patch to add -EAGAIN error to dropwatch netlink message handling code. -EAGAIN will be returned anytime userspace attempts to transition the state of the drop monitor service to a state that its already in. That allows user space to detect this condition, so it doesn't wait for a success ACK that will never arrive. Tested successfully by me Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-05-14tracing: Let tracepoints have data passed to tracepoint callbacksSteven Rostedt
This patch adds data to be passed to tracepoint callbacks. The created functions from DECLARE_TRACE() now need a mandatory data parameter. For example: DECLARE_TRACE(mytracepoint, int value, value) Will create the register function: int register_trace_mytracepoint((void(*)(void *data, int value))probe, void *data); As the first argument, all callbacks (probes) must take a (void *data) parameter. So a callback for the above tracepoint will look like: void myprobe(void *data, int value) { } The callback may choose to ignore the data parameter. This change allows callbacks to register a private data pointer along with the function probe. void mycallback(void *data, int value); register_trace_mytracepoint(mycallback, mydata); Then the mycallback() will receive the "mydata" as the first parameter before the args. A more detailed example: DECLARE_TRACE(mytracepoint, TP_PROTO(int status), TP_ARGS(status)); /* In the C file */ DEFINE_TRACE(mytracepoint, TP_PROTO(int status), TP_ARGS(status)); [...] trace_mytracepoint(status); /* In a file registering this tracepoint */ int my_callback(void *data, int status) { struct my_struct my_data = data; [...] } [...] my_data = kmalloc(sizeof(*my_data), GFP_KERNEL); init_my_data(my_data); register_trace_mytracepoint(my_callback, my_data); The same callback can also be registered to the same tracepoint as long as the data registered is different. Note, the data must also be used to unregister the callback: unregister_trace_mytracepoint(my_callback, my_data); Because of the data parameter, tracepoints declared this way can not have no args. That is: DECLARE_TRACE(mytracepoint, TP_PROTO(void), TP_ARGS()); will cause an error. If no arguments are needed, a new macro can be used instead: DECLARE_TRACE_NOARGS(mytracepoint); Since there are no arguments, the proto and args fields are left out. This is part of a series to make the tracepoint footprint smaller: text data bss dec hex filename 4913961 1088356 861512 6863829 68bbd5 vmlinux.orig 4914025 1088868 861512 6864405 68be15 vmlinux.class 4918492 1084612 861512 6864616 68bee8 vmlinux.tracepoint Again, this patch also increases the size of the kernel, but lays the ground work for decreasing it. v5: Fixed net/core/drop_monitor.c to handle these updates. v4: Moved the DECLARE_TRACE() DECLARE_TRACE_NOARGS out of the #ifdef CONFIG_TRACE_POINTS, since the two are the same in both cases. The __DECLARE_TRACE() is what changes. Thanks to Frederic Weisbecker for pointing this out. v3: Made all register_* functions require data to be passed and all callbacks to take a void * parameter as its first argument. This makes the calling functions comply with C standards. Also added more comments to the modifications of DECLARE_TRACE(). v2: Made the DECLARE_TRACE() have the ability to pass arguments and added a new DECLARE_TRACE_NOARGS() for tracepoints that do not need any arguments. Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Acked-by: Masami Hiramatsu <mhiramat@redhat.com> Acked-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Neil Horman <nhorman@tuxdriver.com> Cc: David S. Miller <davem@davemloft.net> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2010-03-30include cleanup: Update gfp.h and slab.h includes to prepare for breaking ↵Tejun Heo
implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-02-17net: remove INIT_RCU_HEAD() usageAlexey Dobriyan
call_rcu() will unconditionally reinitialize RCU head anyway. Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-11-07net: Replace old style lock initializerThomas Gleixner
SPIN_LOCK_UNLOCKED is deprecated. Use DEFINE_SPINLOCK instead. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-09-02net: drop_monitor: make last_rx timestamp privateNeil Horman
It was recently pointed out to me that the last_rx field of the net_device structure wasn't updated regularly. In fact only the bonding driver really uses it currently. Since the drop_monitor code relies on the last_rx field to detect drops on recevie in hardware, We need to find a more reliable way to rate limit our drop checks (so that we don't check for drops on every frame recevied, which would be inefficient. This patch makes a last_rx timestamp that is private to the drop monitor code and is updated for every device that we track. Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-09-01drop_monitor: fix trace_napi_poll_hit()Xiao Guangrong
The net_dev of backlog napi is NULL, like below: __get_cpu_var(softnet_data).backlog.dev == NULL So, we should check it in napi tracepoint's probe function Acked-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-06-15Merge branch 'master' of ↵David S. Miller
master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6 Conflicts: Documentation/feature-removal-schedule.txt drivers/scsi/fcoe/fcoe.c net/core/drop_monitor.c net/core/net-traces.c
2009-05-21dropmon: add ability to detect when hardware dropsrxpacketsNeil Horman
Patch to add the ability to detect drops in hardware interfaces via dropwatch. Adds a tracepoint to net_rx_action to signal everytime a napi instance is polled. The dropmon code then periodically checks to see if the rx_frames counter has changed, and if so, adds a drop notification to the netlink protocol, using the reserved all-0's vector to indicate the drop location was in hardware, rather than somewhere in the code. Signed-off-by: Neil Horman <nhorman@tuxdriver.com> include/linux/net_dropmon.h | 8 ++ include/trace/napi.h | 11 +++ net/core/dev.c | 5 + net/core/drop_monitor.c | 124 ++++++++++++++++++++++++++++++++++++++++++-- net/core/net-traces.c | 4 + net/core/netpoll.c | 2 6 files changed, 149 insertions(+), 5 deletions(-) Signed-off-by: David S. Miller <davem@davemloft.net>
2009-04-27drop_monitor: Update netlink protocol to include netlink attribute header in ↵Neil Horman
alert message When I initially implemented this protocol, I disregarded the use of netlink attribute headers, thinking for my purposes they weren't needed. I've come to find out that, as I'm starting to work with sending down messages with associated data (like config messages), the kernel code spits out warnings about trailing data in a netlink skb that doesn't have an associated header on it. As such, I'm going to start including attribute headers in my netlink transaction, and so for completeness, I should likely include them on messages bound from the kernel to user space. This patch adds that header to the kernel, and bumps the protocol version accordingly Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-04-14tracing/events: move trace point headers into include/trace/eventsSteven Rostedt
Impact: clean up Create a sub directory in include/trace called events to keep the trace point headers in their own separate directory. Only headers that declare trace points should be defined in this directory. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Neil Horman <nhorman@tuxdriver.com> Cc: Zhao Lei <zhaolei@cn.fujitsu.com> Cc: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2009-03-13Network Drop Monitor: Adding drop monitor implementation & Netlink protocolNeil Horman
Signed-off-by: Neil Horman <nhorman@tuxdriver.com> include/linux/net_dropmon.h | 56 +++++++++ net/core/drop_monitor.c | 263 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 319 insertions(+) Signed-off-by: David S. Miller <davem@davemloft.net>