mirror of
https://gitee.com/bianbu-linux/linux-6.6
synced 2025-04-26 14:17:26 -04:00
rcu: Direct algorithmic SRCU implementation
The current implementation of synchronize_srcu_expedited() can cause severe OS jitter due to its use of synchronize_sched(), which in turn invokes try_stop_cpus(), which causes each CPU to be sent an IPI. This can result in severe performance degradation for real-time workloads and especially for short-interation-length HPC workloads. Furthermore, because only one instance of try_stop_cpus() can be making forward progress at a given time, only one instance of synchronize_srcu_expedited() can make forward progress at a time, even if they are all operating on distinct srcu_struct structures. This commit, inspired by an earlier implementation by Peter Zijlstra (https://lkml.org/lkml/2012/1/31/211) and by further offline discussions, takes a strictly algorithmic bits-in-memory approach. This has the disadvantage of requiring one explicit memory-barrier instruction in each of srcu_read_lock() and srcu_read_unlock(), but on the other hand completely dispenses with OS jitter and furthermore allows SRCU to be used freely by CPUs that RCU believes to be idle or offline. The update-side implementation handles the single read-side memory barrier by rechecking the per-CPU counters after summing them and by running through the update-side state machine twice. This implementation has passed moderate rcutorture testing on both x86 and Power. Also updated to use this_cpu_ptr() instead of per_cpu_ptr(), as suggested by Peter Zijlstra. Reported-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
This commit is contained in:
parent
fae4b54f28
commit
cef50120b6
3 changed files with 200 additions and 100 deletions
|
@ -31,13 +31,19 @@
|
|||
#include <linux/rcupdate.h>
|
||||
|
||||
struct srcu_struct_array {
|
||||
int c[2];
|
||||
unsigned long c[2];
|
||||
};
|
||||
|
||||
/* Bit definitions for field ->c above and ->snap below. */
|
||||
#define SRCU_USAGE_BITS 2
|
||||
#define SRCU_REF_MASK (ULONG_MAX >> SRCU_USAGE_BITS)
|
||||
#define SRCU_USAGE_COUNT (SRCU_REF_MASK + 1)
|
||||
|
||||
struct srcu_struct {
|
||||
int completed;
|
||||
unsigned completed;
|
||||
struct srcu_struct_array __percpu *per_cpu_ref;
|
||||
struct mutex mutex;
|
||||
unsigned long snap[NR_CPUS];
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
struct lockdep_map dep_map;
|
||||
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue