From f45b2974cc0ae959a4c503a071e38a56bd64372f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 17 Nov 2021 13:57:08 +0100 Subject: [PATCH 001/253] bpf, x86: Fix "no previous prototype" warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The arch_prepare_bpf_dispatcher function does not have a prototype, and yields the following warning when W=1 is enabled for the kernel build. >> arch/x86/net/bpf_jit_comp.c:2188:5: warning: no previous \ prototype for 'arch_prepare_bpf_dispatcher' [-Wmissing-prototypes] 2188 | int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, \ int num_funcs) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ Remove the warning by adding a function declaration to include/linux/bpf.h. Fixes: 75ccbef6369e ("bpf: Introduce BPF dispatcher") Reported-by: kernel test robot Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211117125708.769168-1-bjorn@kernel.org --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e7a163a3146b..84ff6ef49462 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -732,6 +732,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); +int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ From 6966df483d7b5b218aeb0e13e7e334a8fc3c1744 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Thu, 18 Nov 2021 13:49:51 +0200 Subject: [PATCH 002/253] regulator: Update protection IRQ helper docs The documentation of IRQ notification helper had still references to first RFC implementation which called BUG() while trying to protect the hardware. Behaviour was improved as calling the BUG() was not a proper solution. Current implementation attempts to call poweroff if handling of potentially damaging error notification fails. Update the documentation to reflect the actual behaviour. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/0c9cc4bcf20c3da66fd5a85c97ee4288e5727538.1637233864.git.matti.vaittinen@fi.rohmeurope.com Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index bd7a73db2e66..54cf566616ae 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -499,7 +499,8 @@ struct regulator_irq_data { * best to shut-down regulator(s) or reboot the SOC if error * handling is repeatedly failing. If fatal_cnt is given the IRQ * handling is aborted if it fails for fatal_cnt times and die() - * callback (if populated) or BUG() is called to try to prevent + * callback (if populated) is called. If die() is not populated + * poweroff for the system is attempted in order to prevent any * further damage. * @reread_ms: The time which is waited before attempting to re-read status * at the worker if IC reading fails. Immediate re-read is done @@ -516,11 +517,12 @@ struct regulator_irq_data { * @data: Driver private data pointer which will be passed as such to * the renable, map_event and die callbacks in regulator_irq_data. * @die: Protection callback. If IC status reading or recovery actions - * fail fatal_cnt times this callback or BUG() is called. This - * callback should implement a final protection attempt like - * disabling the regulator. If protection succeeded this may - * return 0. If anything else is returned the core assumes final - * protection failed and calls BUG() as a last resort. + * fail fatal_cnt times this callback is called or system is + * powered off. This callback should implement a final protection + * attempt like disabling the regulator. If protection succeeded + * die() may return 0. If anything else is returned the core + * assumes final protection failed and attempts to perform a + * poweroff as a last resort. * @map_event: Driver callback to map IRQ status into regulator devices with * events / errors. NOTE: callback MUST initialize both the * errors and notifs for all rdevs which it signals having From 38207a5e81230d6ffbdd51e5fa5681be5116dcae Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Nov 2021 10:14:17 -0800 Subject: [PATCH 003/253] bpf, sockmap: Attach map progs to psock early for feature probes When a TCP socket is added to a sock map we look at the programs attached to the map to determine what proto op hooks need to be changed. Before the patch in the 'fixes' tag there were only two categories -- the empty set of programs or a TX policy. In any case the base set handled the receive case. After the fix we have an optimized program for receive that closes a small, but possible, race on receive. This program is loaded only when the map the psock is being added to includes a RX policy. Otherwise, the race is not possible so we don't need to handle the race condition. In order for the call to sk_psock_init() to correctly evaluate the above conditions all progs need to be set in the psock before the call. However, in the current code this is not the case. We end up evaluating the requirements on the old prog state. If your psock is attached to multiple maps -- for example a tx map and rx map -- then the second update would pull in the correct maps. But, the other pattern with a single rx enabled map the correct receive hooks are not used. The result is the race fixed by the patch in the fixes tag below may still be seen in this case. To fix we simply set all psock->progs before doing the call into sock_map_init(). With this the init() call gets the full list of programs and chooses the correct proto ops on the first iteration instead of requiring the second update to pull them in. This fixes the race case when only a single map is used. Fixes: c5d2177a72a16 ("bpf, sockmap: Fix race in ingress receive verdict with redirect to self") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211119181418.353932-2-john.fastabend@gmail.com --- net/core/sock_map.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index f39ef79ced67..9b528c644fb7 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -282,6 +282,12 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk) if (msg_parser) psock_set_prog(&psock->progs.msg_parser, msg_parser); + if (stream_parser) + psock_set_prog(&psock->progs.stream_parser, stream_parser); + if (stream_verdict) + psock_set_prog(&psock->progs.stream_verdict, stream_verdict); + if (skb_verdict) + psock_set_prog(&psock->progs.skb_verdict, skb_verdict); ret = sock_map_init_proto(sk, psock); if (ret < 0) @@ -292,14 +298,10 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk) ret = sk_psock_init_strp(sk, psock); if (ret) goto out_unlock_drop; - psock_set_prog(&psock->progs.stream_verdict, stream_verdict); - psock_set_prog(&psock->progs.stream_parser, stream_parser); sk_psock_start_strp(sk, psock); } else if (!stream_parser && stream_verdict && !psock->saved_data_ready) { - psock_set_prog(&psock->progs.stream_verdict, stream_verdict); sk_psock_start_verdict(sk,psock); } else if (!stream_verdict && skb_verdict && !psock->saved_data_ready) { - psock_set_prog(&psock->progs.skb_verdict, skb_verdict); sk_psock_start_verdict(sk, psock); } write_unlock_bh(&sk->sk_callback_lock); From c0d95d3380ee099d735e08618c0d599e72f6c8b0 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Nov 2021 10:14:18 -0800 Subject: [PATCH 004/253] bpf, sockmap: Re-evaluate proto ops when psock is removed from sockmap When a sock is added to a sock map we evaluate what proto op hooks need to be used. However, when the program is removed from the sock map we have not been evaluating if that changes the required program layout. Before the patch listed in the 'fixes' tag this was not causing failures because the base program set handles all cases. Specifically, the case with a stream parser and the case with out a stream parser are both handled. With the fix below we identified a race when running with a proto op that attempts to read skbs off both the stream parser and the skb->receive_queue. Namely, that a race existed where when the stream parser is empty checking the skb->receive_queue from recvmsg at the precies moment when the parser is paused and the receive_queue is not empty could result in skipping the stream parser. This may break a RX policy depending on the parser to run. The fix tag then loads a specific proto ops that resolved this race. But, we missed removing that proto ops recv hook when the sock is removed from the sockmap. The result is the stream parser is stopped so no more skbs will be aggregated there, but the hook and BPF program continues to be attached on the psock. User space will then get an EBUSY when trying to read the socket because the recvmsg() handler is now waiting on a stopped stream parser. To fix we rerun the proto ops init() function which will look at the new set of progs attached to the psock and rest the proto ops hook to the correct handlers. And in the above case where we remove the sock from the sock map the RX prog will no longer be listed so the proto ops is removed. Fixes: c5d2177a72a16 ("bpf, sockmap: Fix race in ingress receive verdict with redirect to self") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211119181418.353932-3-john.fastabend@gmail.com --- net/core/skmsg.c | 5 +++++ net/core/sock_map.c | 5 ++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 1ae52ac943f6..8eb671c827f9 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1124,6 +1124,8 @@ void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) { + psock_set_prog(&psock->progs.stream_parser, NULL); + if (!psock->saved_data_ready) return; @@ -1212,6 +1214,9 @@ void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock) void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock) { + psock_set_prog(&psock->progs.stream_verdict, NULL); + psock_set_prog(&psock->progs.skb_verdict, NULL); + if (!psock->saved_data_ready) return; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 9b528c644fb7..4ca4b11f4e5f 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -167,8 +167,11 @@ static void sock_map_del_link(struct sock *sk, write_lock_bh(&sk->sk_callback_lock); if (strp_stop) sk_psock_stop_strp(sk, psock); - else + if (verdict_stop) sk_psock_stop_verdict(sk, psock); + + if (psock->psock_update_sk_prot) + psock->psock_update_sk_prot(sk, psock, false); write_unlock_bh(&sk->sk_callback_lock); } } From b4d25abf9720b69a03465b09d0d62d1998ed6708 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 3 Nov 2021 15:31:08 -0700 Subject: [PATCH 005/253] drm/msm/a6xx: Allocate enough space for GMU registers In commit 142639a52a01 ("drm/msm/a6xx: fix crashstate capture for A650") we changed a6xx_get_gmu_registers() to read 3 sets of registers. Unfortunately, we didn't change the memory allocation for the array. That leads to a KASAN warning (this was on the chromeos-5.4 kernel, which has the problematic commit backported to it): BUG: KASAN: slab-out-of-bounds in _a6xx_get_gmu_registers+0x144/0x430 Write of size 8 at addr ffffff80c89432b0 by task A618-worker/209 CPU: 5 PID: 209 Comm: A618-worker Tainted: G W 5.4.156-lockdep #22 Hardware name: Google Lazor Limozeen without Touchscreen (rev5 - rev8) (DT) Call trace: dump_backtrace+0x0/0x248 show_stack+0x20/0x2c dump_stack+0x128/0x1ec print_address_description+0x88/0x4a0 __kasan_report+0xfc/0x120 kasan_report+0x10/0x18 __asan_report_store8_noabort+0x1c/0x24 _a6xx_get_gmu_registers+0x144/0x430 a6xx_gpu_state_get+0x330/0x25d4 msm_gpu_crashstate_capture+0xa0/0x84c recover_worker+0x328/0x838 kthread_worker_fn+0x32c/0x574 kthread+0x2dc/0x39c ret_from_fork+0x10/0x18 Allocated by task 209: __kasan_kmalloc+0xfc/0x1c4 kasan_kmalloc+0xc/0x14 kmem_cache_alloc_trace+0x1f0/0x2a0 a6xx_gpu_state_get+0x164/0x25d4 msm_gpu_crashstate_capture+0xa0/0x84c recover_worker+0x328/0x838 kthread_worker_fn+0x32c/0x574 kthread+0x2dc/0x39c ret_from_fork+0x10/0x18 Fixes: 142639a52a01 ("drm/msm/a6xx: fix crashstate capture for A650") Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20211103153049.1.Idfa574ccb529d17b69db3a1852e49b580132035c@changeid Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index 7501849ed15d..6e90209cd543 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -777,12 +777,12 @@ static void a6xx_get_gmu_registers(struct msm_gpu *gpu, struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); a6xx_state->gmu_registers = state_kcalloc(a6xx_state, - 2, sizeof(*a6xx_state->gmu_registers)); + 3, sizeof(*a6xx_state->gmu_registers)); if (!a6xx_state->gmu_registers) return; - a6xx_state->nr_gmu_registers = 2; + a6xx_state->nr_gmu_registers = 3; /* Get the CX GMU registers from AHB */ _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0], From 59ba1b2b4825342676300f66d785764be3fcb093 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 5 Nov 2021 13:20:21 -0700 Subject: [PATCH 006/253] drm/msm/devfreq: Fix OPP refcnt leak Reported-by: Douglas Anderson Fixes: 9bc95570175a ("drm/msm: Devfreq tuning") Signed-off-by: Rob Clark Reviewed-by: Douglas Anderson Tested-By: Steev Klimaszewski Reviewed-by: Akhil P Oommen Link: https://lore.kernel.org/r/20211105202021.181092-1-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gpu_devfreq.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c index 8b7473f69cb8..3b129161c140 100644 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -20,6 +20,10 @@ static int msm_devfreq_target(struct device *dev, unsigned long *freq, struct msm_gpu *gpu = dev_to_gpu(dev); struct dev_pm_opp *opp; + /* + * Note that devfreq_recommended_opp() can modify the freq + * to something that actually is in the opp table: + */ opp = devfreq_recommended_opp(dev, freq, flags); /* @@ -28,6 +32,7 @@ static int msm_devfreq_target(struct device *dev, unsigned long *freq, */ if (gpu->devfreq.idle_freq) { gpu->devfreq.idle_freq = *freq; + dev_pm_opp_put(opp); return 0; } From 3466d9e217b337bf473ee629c608e53f9f3ab786 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 10 Nov 2021 11:33:42 -0800 Subject: [PATCH 007/253] drm/msm: Fix mmap to include VM_IO and VM_DONTDUMP In commit 510410bfc034 ("drm/msm: Implement mmap as GEM object function") we switched to a new/cleaner method of doing things. That's good, but we missed a little bit. Before that commit, we used to _first_ run through the drm_gem_mmap_obj() case where `obj->funcs->mmap()` was NULL. That meant that we ran: vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); ...and _then_ we modified those mappings with our own. Now that `obj->funcs->mmap()` is no longer NULL we don't run the default code. It looks like the fact that the vm_flags got VM_IO / VM_DONTDUMP was important because we're now getting crashes on Chromebooks that use ARC++ while logging out. Specifically a crash that looks like this (this is on a 5.10 kernel w/ relevant backports but also seen on a 5.15 kernel): Unable to handle kernel paging request at virtual address ffffffc008000000 Mem abort info: ESR = 0x96000006 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000006 CM = 0, WnR = 0 swapper pgtable: 4k pages, 39-bit VAs, pgdp=000000008293d000 [ffffffc008000000] pgd=00000001002b3003, p4d=00000001002b3003, pud=00000001002b3003, pmd=0000000000000000 Internal error: Oops: 96000006 [#1] PREEMPT SMP [...] CPU: 7 PID: 15734 Comm: crash_dump64 Tainted: G W 5.10.67 #1 [...] Hardware name: Qualcomm Technologies, Inc. sc7280 IDP SKU2 platform (DT) pstate: 80400009 (Nzcv daif +PAN -UAO -TCO BTYPE=--) pc : __arch_copy_to_user+0xc0/0x30c lr : copyout+0xac/0x14c [...] Call trace: __arch_copy_to_user+0xc0/0x30c copy_page_to_iter+0x1a0/0x294 process_vm_rw_core+0x240/0x408 process_vm_rw+0x110/0x16c __arm64_sys_process_vm_readv+0x30/0x3c el0_svc_common+0xf8/0x250 do_el0_svc+0x30/0x80 el0_svc+0x10/0x1c el0_sync_handler+0x78/0x108 el0_sync+0x184/0x1c0 Code: f8408423 f80008c3 910020c6 36100082 (b8404423) Let's add the two flags back in. While we're at it, the fact that we aren't running the default means that we _don't_ need to clear out VM_PFNMAP, so remove that and save an instruction. NOTE: it was confirmed that VM_IO was the important flag to fix the problem I was seeing, but adding back VM_DONTDUMP seems like a sane thing to do so I'm doing that too. Fixes: 510410bfc034 ("drm/msm: Implement mmap as GEM object function") Reported-by: Stephen Boyd Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Tested-by: Stephen Boyd Link: https://lore.kernel.org/r/20211110113334.1.I1687e716adb2df746da58b508db3f25423c40b27@changeid Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 104fdfc14027..3f7f350c13b7 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -1056,8 +1056,7 @@ static int msm_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct { struct msm_gem_object *msm_obj = to_msm_bo(obj); - vma->vm_flags &= ~VM_PFNMAP; - vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND; + vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_page_prot = msm_gem_pgprot(msm_obj, vm_get_page_prot(vma->vm_flags)); return 0; From ea0006d390a28012f8187717aea61498b2b341e5 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 11 Nov 2021 11:24:55 -0800 Subject: [PATCH 008/253] drm/msm: Fix wait_fence submitqueue leak We weren't dropping the submitqueue reference in all paths. In particular, when the fence has already been signalled. Split out a helper to simplify handling this in the various different return paths. Fixes: a61acbbe9cf8 ("drm/msm: Track "seqno" fences by idr") Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20211111192457.747899-2-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_drv.c | 49 +++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 7936e8d498dd..b8ec009e088f 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -967,29 +967,12 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data, return ret; } -static int msm_ioctl_wait_fence(struct drm_device *dev, void *data, - struct drm_file *file) +static int wait_fence(struct msm_gpu_submitqueue *queue, uint32_t fence_id, + ktime_t timeout) { - struct msm_drm_private *priv = dev->dev_private; - struct drm_msm_wait_fence *args = data; - ktime_t timeout = to_ktime(args->timeout); - struct msm_gpu_submitqueue *queue; - struct msm_gpu *gpu = priv->gpu; struct dma_fence *fence; int ret; - if (args->pad) { - DRM_ERROR("invalid pad: %08x\n", args->pad); - return -EINVAL; - } - - if (!gpu) - return 0; - - queue = msm_submitqueue_get(file->driver_priv, args->queueid); - if (!queue) - return -ENOENT; - /* * Map submitqueue scoped "seqno" (which is actually an idr key) * back to underlying dma-fence @@ -1001,7 +984,7 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data, ret = mutex_lock_interruptible(&queue->lock); if (ret) return ret; - fence = idr_find(&queue->fence_idr, args->fence); + fence = idr_find(&queue->fence_idr, fence_id); if (fence) fence = dma_fence_get_rcu(fence); mutex_unlock(&queue->lock); @@ -1017,6 +1000,32 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data, } dma_fence_put(fence); + + return ret; +} + +static int msm_ioctl_wait_fence(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct msm_drm_private *priv = dev->dev_private; + struct drm_msm_wait_fence *args = data; + struct msm_gpu_submitqueue *queue; + int ret; + + if (args->pad) { + DRM_ERROR("invalid pad: %08x\n", args->pad); + return -EINVAL; + } + + if (!priv->gpu) + return 0; + + queue = msm_submitqueue_get(file->driver_priv, args->queueid); + if (!queue) + return -ENOENT; + + ret = wait_fence(queue, args->fence, to_ktime(args->timeout)); + msm_submitqueue_put(queue); return ret; From 067ecab9eef620d41040715669e5fcdc2f8ff963 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 11 Nov 2021 11:24:56 -0800 Subject: [PATCH 009/253] drm/msm: Restore error return on invalid fence When converting to use an idr to map userspace fence seqno values back to a dma_fence, we lost the error return when userspace passes seqno that is larger than the last submitted fence. Restore this check. Reported-by: Akhil P Oommen Fixes: a61acbbe9cf8 ("drm/msm: Track "seqno" fences by idr") Signed-off-by: Rob Clark Reviewed-by: Akhil P Oommen Link: https://lore.kernel.org/r/20211111192457.747899-3-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_drv.c | 6 ++++++ drivers/gpu/drm/msm/msm_gem_submit.c | 1 + drivers/gpu/drm/msm/msm_gpu.h | 3 +++ 3 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index b8ec009e088f..892c04365239 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -973,6 +973,12 @@ static int wait_fence(struct msm_gpu_submitqueue *queue, uint32_t fence_id, struct dma_fence *fence; int ret; + if (fence_id > queue->last_fence) { + DRM_ERROR_RATELIMITED("waiting on invalid fence: %u (of %u)\n", + fence_id, queue->last_fence); + return -EINVAL; + } + /* * Map submitqueue scoped "seqno" (which is actually an idr key) * back to underlying dma-fence diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 3cb029f10925..aa341e83627b 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -904,6 +904,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, drm_sched_entity_push_job(&submit->base); args->fence = submit->fence_id; + queue->last_fence = submit->fence_id; msm_reset_syncobjs(syncobjs_to_reset, args->nr_in_syncobjs); msm_process_post_deps(post_deps, args->nr_out_syncobjs, diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 59cdd00b69d0..48ea2de911f1 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -359,6 +359,8 @@ static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio, * @ring_nr: the ringbuffer used by this submitqueue, which is determined * by the submitqueue's priority * @faults: the number of GPU hangs associated with this submitqueue + * @last_fence: the sequence number of the last allocated fence (for error + * checking) * @ctx: the per-drm_file context associated with the submitqueue (ie. * which set of pgtables do submits jobs associated with the * submitqueue use) @@ -374,6 +376,7 @@ struct msm_gpu_submitqueue { u32 flags; u32 ring_nr; int faults; + uint32_t last_fence; struct msm_file_private *ctx; struct list_head node; struct idr fence_idr; From 4823c03049250904de3c446fd286d21c83babffd Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 11 Nov 2021 15:01:49 -0800 Subject: [PATCH 010/253] drm/msm: Make a6xx_gpu_set_freq() static Reported-by: kernel test robot Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20211111230151.765228-1-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 267a880811d6..9e47714d700b 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1640,7 +1640,7 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu) return (unsigned long)busy_time; } -void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) +static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); From 2d1d175a61dff2097072d66fe0c3b51d534245d6 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 11 Nov 2021 15:02:14 -0800 Subject: [PATCH 011/253] drm/msm: Demote debug message Mesa attempts to allocate a cached-coherent buffer in order to determine if cached-coherent is supported. Resulting in seeing this error message once per process with newer mesa. But no reason for this to be more than a debug msg. Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20211111230214.765476-1-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 3f7f350c13b7..512d55eecbaf 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -1120,7 +1120,7 @@ static int msm_gem_new_impl(struct drm_device *dev, break; fallthrough; default: - DRM_DEV_ERROR(dev->dev, "invalid cache flag: %x\n", + DRM_DEV_DEBUG(dev->dev, "invalid cache flag: %x\n", (flags & MSM_BO_CACHE_MASK)); return -EINVAL; } From 26d776fd0f79f093a5d0ce1a4c7c7a992bc3264c Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Thu, 18 Nov 2021 15:50:30 +0530 Subject: [PATCH 012/253] drm/msm: Fix null ptr access msm_ioctl_gem_submit() Fix the below null pointer dereference in msm_ioctl_gem_submit(): 26545.260705: Call trace: 26545.263223: kref_put+0x1c/0x60 26545.266452: msm_ioctl_gem_submit+0x254/0x744 26545.270937: drm_ioctl_kernel+0xa8/0x124 26545.274976: drm_ioctl+0x21c/0x33c 26545.278478: drm_compat_ioctl+0xdc/0xf0 26545.282428: __arm64_compat_sys_ioctl+0xc8/0x100 26545.287169: el0_svc_common+0xf8/0x250 26545.291025: do_el0_svc_compat+0x28/0x54 26545.295066: el0_svc_compat+0x10/0x1c 26545.298838: el0_sync_compat_handler+0xa8/0xcc 26545.303403: el0_sync_compat+0x188/0x1c0 26545.307445: Code: d503201f d503201f 52800028 4b0803e8 (b8680008) 26545.318799: Kernel panic - not syncing: Oops: Fatal exception Signed-off-by: Akhil P Oommen Link: https://lore.kernel.org/r/20211118154903.2.I3ae019673a0cc45d83a193a7858748dd03dbb820@changeid Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index aa341e83627b..282628d6b72c 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -772,6 +772,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, args->nr_cmds); if (IS_ERR(submit)) { ret = PTR_ERR(submit); + submit = NULL; goto out_unlock; } From 9ba873e66ed317a1ff645d5e52c2e72597ff3d18 Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Thu, 18 Nov 2021 15:50:31 +0530 Subject: [PATCH 013/253] drm/msm/a6xx: Fix uinitialized use of gpu_scid Avoid a possible uninitialized use of gpu_scid variable to fix the below smatch warning: drivers/gpu/drm/msm/adreno/a6xx_gpu.c:1480 a6xx_llc_activate() error: uninitialized symbol 'gpu_scid'. Reported-by: Dan Carpenter Signed-off-by: Akhil P Oommen Link: https://lore.kernel.org/r/20211118154903.3.Ie4ac321feb10168af569d9c2b4cf6828bed8122c@changeid Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 9e47714d700b..78aad5216a61 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1424,17 +1424,24 @@ static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) { struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; struct msm_gpu *gpu = &adreno_gpu->base; - u32 gpu_scid, cntl1_regval = 0; + u32 cntl1_regval = 0; if (IS_ERR(a6xx_gpu->llc_mmio)) return; if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { - gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); + u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); gpu_scid &= 0x1f; cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) | (gpu_scid << 15) | (gpu_scid << 20); + + /* On A660, the SCID programming for UCHE traffic is done in + * A6XX_GBIF_SCACHE_CNTL0[14:10] + */ + if (adreno_is_a660_family(adreno_gpu)) + gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) | + (1 << 8), (gpu_scid << 10) | (1 << 8)); } /* @@ -1471,13 +1478,6 @@ static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) } gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval); - - /* On A660, the SCID programming for UCHE traffic is done in - * A6XX_GBIF_SCACHE_CNTL0[14:10] - */ - if (adreno_is_a660_family(adreno_gpu)) - gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) | - (1 << 8), (gpu_scid << 10) | (1 << 8)); } static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu) From 26b6f1c870b81ce8a48751a21f363958d2c04cf2 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 20 Nov 2021 12:01:01 -0800 Subject: [PATCH 014/253] drm/msm/gpu: Fix idle_work time This was supposed to be a relative timer, not absolute. Fixes: 658f4c829688 ("drm/msm/devfreq: Add 1ms delay before clamping freq") Signed-off-by: Rob Clark Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20211120200103.1051459-1-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gpu_devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c index 3b129161c140..4beb42c31900 100644 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -233,5 +233,5 @@ void msm_devfreq_idle(struct msm_gpu *gpu) struct msm_gpu_devfreq *df = &gpu->devfreq; msm_hrtimer_queue_work(&df->idle_work, ms_to_ktime(1), - HRTIMER_MODE_ABS); + HRTIMER_MODE_REL); } From 5dbe2711e41818b5474d2119d075d2facc8533dc Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 20 Nov 2021 12:01:02 -0800 Subject: [PATCH 015/253] drm/msm/gpu: Fix check for devices without devfreq Looks like 658f4c829688 ("drm/msm/devfreq: Add 1ms delay before clamping freq") was badly rebased on top of efb8a170a367 ("drm/msm: Fix devfreq NULL pointer dereference on a3xx") and ended up with the NULL check in the wrong place. Fixes: 658f4c829688 ("drm/msm/devfreq: Add 1ms delay before clamping freq") Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20211120200103.1051459-2-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gpu_devfreq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c index 4beb42c31900..384e90c4b2a7 100644 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -208,9 +208,6 @@ static void msm_devfreq_idle_work(struct kthread_work *work) struct msm_gpu *gpu = container_of(df, struct msm_gpu, devfreq); unsigned long idle_freq, target_freq = 0; - if (!df->devfreq) - return; - /* * Hold devfreq lock to synchronize with get_dev_status()/ * target() callbacks @@ -232,6 +229,9 @@ void msm_devfreq_idle(struct msm_gpu *gpu) { struct msm_gpu_devfreq *df = &gpu->devfreq; + if (!df->devfreq) + return; + msm_hrtimer_queue_work(&df->idle_work, ms_to_ktime(1), HRTIMER_MODE_REL); } From cd92cc187c053ab010a1570e2d61d68394a5c725 Mon Sep 17 00:00:00 2001 From: Philip Chen Date: Sat, 30 Oct 2021 10:08:50 -0700 Subject: [PATCH 016/253] drm/msm/dsi: set default num_data_lanes If "data_lanes" property of the dsi output endpoint is missing in the DT, num_data_lanes would be 0 by default, which could cause dsi_host_attach() to fail if dsi->lanes is set to a non-zero value by the bridge driver. According to the binding document of msm dsi controller, the input/output endpoint of the controller is expected to have 4 lanes. So let's set num_data_lanes to 4 by default. Signed-off-by: Philip Chen Reviewed-by: Douglas Anderson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20211030100812.1.I6cd9af36b723fed277d34539d3b2ba4ca233ad2d@changeid Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/dsi_host.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index f69a125f9559..0afc3b756f92 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -1658,6 +1658,8 @@ static int dsi_host_parse_lane_data(struct msm_dsi_host *msm_host, if (!prop) { DRM_DEV_DEBUG(dev, "failed to find data lane mapping, using default\n"); + /* Set the number of date lanes to 4 by default. */ + msm_host->num_data_lanes = 4; return 0; } From d03fcc1de0863b1188ceb867cfa84a578fdc96bc Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 9 Nov 2021 10:04:18 -0800 Subject: [PATCH 017/253] drm/msm/dp: Avoid unpowered AUX xfers that caused crashes If you happened to try to access `/dev/drm_dp_aux` devices provided by the MSM DP AUX driver too early at bootup you could go boom. Let's avoid that by only allowing AUX transfers when the controller is powered up. Specifically the crash that was seen (on Chrome OS 5.4 tree with relevant backports): Kernel panic - not syncing: Asynchronous SError Interrupt CPU: 0 PID: 3131 Comm: fwupd Not tainted 5.4.144-16620-g28af11b73efb #1 Hardware name: Google Lazor (rev3+) with KB Backlight (DT) Call trace: dump_backtrace+0x0/0x14c show_stack+0x20/0x2c dump_stack+0xac/0x124 panic+0x150/0x390 nmi_panic+0x80/0x94 arm64_serror_panic+0x78/0x84 do_serror+0x0/0x118 do_serror+0xa4/0x118 el1_error+0xbc/0x160 dp_catalog_aux_write_data+0x1c/0x3c dp_aux_cmd_fifo_tx+0xf0/0x1b0 dp_aux_transfer+0x1b0/0x2bc drm_dp_dpcd_access+0x8c/0x11c drm_dp_dpcd_read+0x64/0x10c auxdev_read_iter+0xd4/0x1c4 I did a little bit of tracing and found that: * We register the AUX device very early at bootup. * Power isn't actually turned on for my system until hpd_event_thread() -> dp_display_host_init() -> dp_power_init() * You can see that dp_power_init() calls dp_aux_init() which is where we start allowing AUX channel requests to go through. In general this patch is a bit of a bandaid but at least it gets us out of the current state where userspace acting at the wrong time can fully crash the system. * I think the more proper fix (which requires quite a bit more changes) is to power stuff on while an AUX transfer is happening. This is like the solution we did for ti-sn65dsi86. This might be required for us to move to populating the panel via the DP-AUX bus. * Another fix considered was to dynamically register / unregister. I tried that at but it got ugly. Currently there's a bug where the pm_runtime() state isn't tracked properly and that causes us to just keep registering more and more. Signed-off-by: Douglas Anderson Reviewed-by: Kuogee Hsieh Reviewed-by: Abhinav Kumar Link: https://lore.kernel.org/r/20211109100403.1.I4e23470d681f7efe37e2e7f1a6466e15e9bb1d72@changeid Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_aux.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c index eb40d8413bca..6d36f63c3338 100644 --- a/drivers/gpu/drm/msm/dp/dp_aux.c +++ b/drivers/gpu/drm/msm/dp/dp_aux.c @@ -33,6 +33,7 @@ struct dp_aux_private { bool read; bool no_send_addr; bool no_send_stop; + bool initted; u32 offset; u32 segment; @@ -331,6 +332,10 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux, } mutex_lock(&aux->mutex); + if (!aux->initted) { + ret = -EIO; + goto exit; + } dp_aux_update_offset_and_segment(aux, msg); dp_aux_transfer_helper(aux, msg, true); @@ -380,6 +385,8 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux, } aux->cmd_busy = false; + +exit: mutex_unlock(&aux->mutex); return ret; @@ -431,8 +438,13 @@ void dp_aux_init(struct drm_dp_aux *dp_aux) aux = container_of(dp_aux, struct dp_aux_private, dp_aux); + mutex_lock(&aux->mutex); + dp_catalog_aux_enable(aux->catalog, true); aux->retry_cnt = 0; + aux->initted = true; + + mutex_unlock(&aux->mutex); } void dp_aux_deinit(struct drm_dp_aux *dp_aux) @@ -441,7 +453,12 @@ void dp_aux_deinit(struct drm_dp_aux *dp_aux) aux = container_of(dp_aux, struct dp_aux_private, dp_aux); + mutex_lock(&aux->mutex); + + aux->initted = false; dp_catalog_aux_enable(aux->catalog, false); + + mutex_unlock(&aux->mutex); } int dp_aux_register(struct drm_dp_aux *dp_aux) From e4840d537c2c6b1189d4de16ee0f4820e069dcea Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 8 Nov 2021 10:01:22 -0800 Subject: [PATCH 018/253] drm/msm: Do hw_init() before capturing GPU state In particular, we need to ensure all the necessary blocks are switched to 64b mode (a5xx+) otherwise the high bits of the address of the BO to snapshot state into will be ignored, resulting in: *** gpu fault: ttbr0=0000000000000000 iova=0000000000012000 dir=READ type=TRANSLATION source=CP (0,0,0,0) platform 506a000.gmu: [drm:a6xx_gmu_set_oob] *ERROR* Timeout waiting for GMU OOB set BOOT_SLUMBER: 0x0 Fixes: 4f776f4511c7 ("drm/msm/gpu: Convert the GPU show function to use the GPU state") Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20211108180122.487859-1-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_debugfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index 09d2d279c30a..dee13fedee3b 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -77,6 +77,7 @@ static int msm_gpu_open(struct inode *inode, struct file *file) goto free_priv; pm_runtime_get_sync(&gpu->pdev->dev); + msm_gpu_hw_init(gpu); show_priv->state = gpu->funcs->gpu_state_get(gpu); pm_runtime_put_sync(&gpu->pdev->dev); From 0956ba63bd94355bf38cd40f7eb9104577739ab8 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 23 Nov 2021 08:56:46 -0800 Subject: [PATCH 019/253] scsi: lpfc: Fix non-recovery of remote ports following an unsolicited LOGO A commit introduced formal regstration of all Fabric nodes to the SCSI transport as well as REG/UNREG RPI mailbox requests. The commit introduced the NLP_RELEASE_RPI flag for rports set in the lpfc_cmpl_els_logo_acc() routine to help clean up the RPIs. This new code caused the driver to release the RPI value used for the remote port and marked the RPI invalid. When the driver later attempted to re-login, it would use the invalid RPI and the adapter rejected the PLOGI request. As no login occurred, the devloss timer on the rport expired and connectivity was lost. This patch corrects the code by removing the snippet that requests the rpi to be unregistered. This change only occurs on a node that is already marked to be rediscovered. This puts the code back to its original behavior, preserving the already-assigned rpi value (registered or not) which can be used on the re-login attempts. Link: https://lore.kernel.org/r/20211123165646.62740-1-jsmart2021@gmail.com Fixes: fe83e3b9b422 ("scsi: lpfc: Fix node handling for Fabric Controller and Domain Controller") Cc: # v5.14+ Co-developed-by: Paul Ely Signed-off-by: Paul Ely Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index b940e0268f96..e83453bea2ae 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -5095,14 +5095,9 @@ lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* NPort Recovery mode or node is just allocated */ if (!lpfc_nlp_not_used(ndlp)) { /* A LOGO is completing and the node is in NPR state. - * If this a fabric node that cleared its transport - * registration, release the rpi. + * Just unregister the RPI because the node is still + * required. */ - spin_lock_irq(&ndlp->lock); - ndlp->nlp_flag &= ~NLP_NPR_2B_DISC; - if (phba->sli_rev == LPFC_SLI_REV4) - ndlp->nlp_flag |= NLP_RELEASE_RPI; - spin_unlock_irq(&ndlp->lock); lpfc_unreg_rpi(vport, ndlp); } else { /* Indicate the node has already released, should From 53ae7230918154d1f4281d7aa3aae9650436eadf Mon Sep 17 00:00:00 2001 From: Ilie Halip Date: Wed, 17 Nov 2021 19:48:21 +0200 Subject: [PATCH 020/253] s390/test_unwind: use raw opcode instead of invalid instruction Building with clang & LLVM_IAS=1 leads to an error: arch/s390/lib/test_unwind.c:179:4: error: invalid register pair " mvcl %%r1,%%r1\n" ^ The test creates an invalid instruction that would trap at runtime, but the LLVM inline assembler tries to validate it at compile time too. Use the raw instruction opcode instead. Reported-by: Nick Desaulniers Signed-off-by: Ilie Halip Reviewed-by: Nick Desaulniers Suggested-by: Ulrich Weigand Link: https://github.com/ClangBuiltLinux/linux/issues/1421 Link: https://lore.kernel.org/r/20211117174822.3632412-1-ilie.halip@gmail.com Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger [hca@linux.ibm.com: use illegal opcode, and update comment] Signed-off-by: Heiko Carstens --- arch/s390/lib/test_unwind.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index cfc5f5557c06..bc7973359ae2 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -173,10 +173,11 @@ static noinline int unwindme_func4(struct unwindme *u) } /* - * trigger specification exception + * Trigger operation exception; use insn notation to bypass + * llvm's integrated assembler sanity checks. */ asm volatile( - " mvcl %%r1,%%r1\n" + " .insn e,0x0000\n" /* illegal opcode */ "0: nopr %%r7\n" EX_TABLE(0b, 0b) :); From b68f8a13e3b4bd2f956250cd428fee344f4d60a3 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 23 Nov 2021 22:04:19 +0100 Subject: [PATCH 021/253] platform/x86: thinkpad_acpi: Restore missing hotkey_tablet_mode and hotkey_radio_sw sysfs-attr Commit c99ca78d67a6 ("platform/x86: thinkpad_acpi: Switch to common use of attributes") removed the conditional adding of the hotkey_tablet_mode and hotkey_radio_sw sysfs-attributes, replacing this with a hotkey_attr_is_visible() callback which hides them when the feature is not present. But this commit forgot to add these 2 attributes to the default hotkey_attributes[] set, so they would now never get added at all. Add the 2 attributes to the default hotkey_attributes[] set so that they are available on systems with these features once more. Fixes: c99ca78d67a6 ("platform/x86: thinkpad_acpi: Switch to common use of attributes") Cc: Len Baker Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211123210424.266607-2-hdegoede@redhat.com --- drivers/platform/x86/thinkpad_acpi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index b3ac9c3f3b7c..17d581e21e7f 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -3015,6 +3015,8 @@ static struct attribute *hotkey_attributes[] = { &dev_attr_hotkey_all_mask.attr, &dev_attr_hotkey_adaptive_all_mask.attr, &dev_attr_hotkey_recommended_mask.attr, + &dev_attr_hotkey_tablet_mode.attr, + &dev_attr_hotkey_radio_sw.attr, #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL &dev_attr_hotkey_source_mask.attr, &dev_attr_hotkey_poll_freq.attr, From be892e95361fb72365a4f81475a34c5d43e36708 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 23 Nov 2021 22:05:24 +0100 Subject: [PATCH 022/253] platform/x86: thinkpad_acpi: Add lid_logo_dot to the list of safe LEDs There have been various bugs / forum threads about allowing control of the LED in the ThinkPad logo on the lid of various models. This seems to be something which users want to control and there really is no reason to require setting CONFIG_THINKPAD_ACPI_UNSAFE_LEDS for this. The lid-logo-dot is LED number 10, so change the name of the 10th led from unknown_led2 to lid_logo_dot and add it to the TPACPI_SAFE_LEDS mask. Link: https://www.reddit.com/r/thinkpad/comments/7n8eyu/thinkpad_led_control_under_gnulinux/ BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1943318 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211123210524.266705-2-hdegoede@redhat.com --- drivers/platform/x86/thinkpad_acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 17d581e21e7f..bb1abb947e1e 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -5728,11 +5728,11 @@ static const char * const tpacpi_led_names[TPACPI_LED_NUMLEDS] = { "tpacpi::standby", "tpacpi::dock_status1", "tpacpi::dock_status2", - "tpacpi::unknown_led2", + "tpacpi::lid_logo_dot", "tpacpi::unknown_led3", "tpacpi::thinkvantage", }; -#define TPACPI_SAFE_LEDS 0x1081U +#define TPACPI_SAFE_LEDS 0x1481U static inline bool tpacpi_is_led_restricted(const unsigned int led) { From 48d5e836ebc09cb766ab5e11945fa7e89120a3f6 Mon Sep 17 00:00:00 2001 From: Matan Ziv-Av Date: Tue, 23 Nov 2021 22:14:55 +0200 Subject: [PATCH 023/253] platform/x86: lg-laptop: Recognize more models LG uses 5 instead of 0 in the third digit (second digit after 2019) of the year string to indicate newer models in the same year. Handle this case as well. Signed-off-by: Matan Ziv-Av Link: https://lore.kernel.org/r/c752b3b2-9718-bd9a-732d-e165aa8a1fca@svgalib.org Signed-off-by: Hans de Goede --- drivers/platform/x86/lg-laptop.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c index ae9293024c77..a91847a551a7 100644 --- a/drivers/platform/x86/lg-laptop.c +++ b/drivers/platform/x86/lg-laptop.c @@ -657,6 +657,18 @@ static int acpi_add(struct acpi_device *device) if (product && strlen(product) > 4) switch (product[4]) { case '5': + if (strlen(product) > 5) + switch (product[5]) { + case 'N': + year = 2021; + break; + case '0': + year = 2016; + break; + default: + year = 2022; + } + break; case '6': year = 2016; break; From bbb9429a210ee79c2e4a0d1b6c41818975585ca9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 24 Nov 2021 18:51:25 +0100 Subject: [PATCH 024/253] platform/x86: touchscreen_dmi: Add TrekStor SurfTab duo W1 touchscreen info The TrekStor SurfTab duo W1 (ST10432-10b) has a Goodix touchscreen which has its x-axis mirrored. Add a quirk to fix this. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211124175125.250329-1-hdegoede@redhat.com --- drivers/platform/x86/touchscreen_dmi.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index fa8812039b82..17dd54d4b783 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -905,6 +905,16 @@ static const struct ts_dmi_data trekstor_primetab_t13b_data = { .properties = trekstor_primetab_t13b_props, }; +static const struct property_entry trekstor_surftab_duo_w1_props[] = { + PROPERTY_ENTRY_BOOL("touchscreen-inverted-x"), + { } +}; + +static const struct ts_dmi_data trekstor_surftab_duo_w1_data = { + .acpi_name = "GDIX1001:00", + .properties = trekstor_surftab_duo_w1_props, +}; + static const struct property_entry trekstor_surftab_twin_10_1_props[] = { PROPERTY_ENTRY_U32("touchscreen-min-x", 20), PROPERTY_ENTRY_U32("touchscreen-min-y", 0), @@ -1502,6 +1512,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Primetab T13B"), }, }, + { + /* TrekStor SurfTab duo W1 10.1 ST10432-10b */ + .driver_data = (void *)&trekstor_surftab_duo_w1_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TrekStor"), + DMI_MATCH(DMI_PRODUCT_NAME, "SurfTab duo W1 10.1 (VT4)"), + }, + }, { /* TrekStor SurfTab twin 10.1 ST10432-8 */ .driver_data = (void *)&trekstor_surftab_twin_10_1_data, From b3483994b33a18a284aa453e21682a03f3b61206 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Tue, 2 Nov 2021 13:38:17 +0100 Subject: [PATCH 025/253] MAINTAINERS: Add rpmsg tty driver maintainer Adding myself as rpmsg tty maintainer and also adding remoteproc mailing list to inform about changes in the driver. Signed-off-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20211102123817.19874-1-arnaud.pouliquen@foss.st.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 5250298d2817..4af3c8eb5b47 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16490,6 +16490,12 @@ T: git git://linuxtv.org/media_tree.git F: Documentation/devicetree/bindings/media/allwinner,sun8i-a83t-de2-rotate.yaml F: drivers/media/platform/sunxi/sun8i-rotate/ +RPMSG TTY DRIVER +M: Arnaud Pouliquen +L: linux-remoteproc@vger.kernel.org +S: Maintained +F: drivers/tty/rpmsg_tty.c + RTL2830 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org From 00de977f9e0aa9760d9a79d1e41ff780f74e3424 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 8 Nov 2021 09:54:31 +0100 Subject: [PATCH 026/253] serial: core: fix transmit-buffer reset and memleak Commit 761ed4a94582 ("tty: serial_core: convert uart_close to use tty_port_close") converted serial core to use tty_port_close() but failed to notice that the transmit buffer still needs to be freed on final close. Not freeing the transmit buffer means that the buffer is no longer cleared on next open so that any ioctl() waiting for the buffer to drain might wait indefinitely (e.g. on termios changes) or that stale data can end up being transmitted in case tx is restarted. Furthermore, the buffer of any port that has been opened would leak on driver unbind. Note that the port lock is held when clearing the buffer pointer due to the ldisc race worked around by commit a5ba1d95e46e ("uart: fix race between uart_put_char() and uart_shutdown()"). Also note that the tty-port shutdown() callback is not called for console ports so it is not strictly necessary to free the buffer page after releasing the lock (cf. d72402145ace ("tty/serial: do not free trasnmit buffer page under port lock")). Link: https://lore.kernel.org/r/319321886d97c456203d5c6a576a5480d07c3478.1635781688.git.baruch@tkos.co.il Fixes: 761ed4a94582 ("tty: serial_core: convert uart_close to use tty_port_close") Cc: stable@vger.kernel.org # 4.9 Cc: Rob Herring Reported-by: Baruch Siach Tested-by: Baruch Siach Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20211108085431.12637-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 1e738f265eea..8968d15d7804 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1549,6 +1549,7 @@ static void uart_tty_port_shutdown(struct tty_port *port) { struct uart_state *state = container_of(port, struct uart_state, port); struct uart_port *uport = uart_port_check(state); + char *buf; /* * At this point, we stop accepting input. To do this, we @@ -1570,8 +1571,18 @@ static void uart_tty_port_shutdown(struct tty_port *port) */ tty_port_set_suspended(port, 0); - uart_change_pm(state, UART_PM_STATE_OFF); + /* + * Free the transmit buffer. + */ + spin_lock_irq(&uport->lock); + buf = state->xmit.buf; + state->xmit.buf = NULL; + spin_unlock_irq(&uport->lock); + if (buf) + free_page((unsigned long)buf); + + uart_change_pm(state, UART_PM_STATE_OFF); } static void uart_wait_until_sent(struct tty_struct *tty, int timeout) From ac442a077acf9a6bf1db4320ec0c3f303be092b3 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Tue, 9 Nov 2021 17:22:48 +0000 Subject: [PATCH 027/253] serial: pl011: Add ACPI SBSA UART match id The document 'ACPI for Arm Components 1.0' defines the following _HID mappings: -'Prime cell UART (PL011)': ARMH0011 -'SBSA UART': ARMHB000 Use the sbsa-uart driver when a device is described with the 'ARMHB000' _HID. Note: PL011 devices currently use the sbsa-uart driver instead of the uart-pl011 driver. Indeed, PL011 devices are not bound to a clock in ACPI. It is not possible to change their baudrate. Cc: Signed-off-by: Pierre Gondois Link: https://lore.kernel.org/r/20211109172248.19061-1-Pierre.Gondois@arm.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index d361cd84ff8c..52518a606c06 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -2947,6 +2947,7 @@ MODULE_DEVICE_TABLE(of, sbsa_uart_of_match); static const struct acpi_device_id __maybe_unused sbsa_uart_acpi_match[] = { { "ARMH0011", 0 }, + { "ARMHB000", 0 }, {}, }; MODULE_DEVICE_TABLE(acpi, sbsa_uart_acpi_match); From 7492ffc90fa126afb67d4392d56cb4134780194a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 13 Nov 2021 13:10:50 +0100 Subject: [PATCH 028/253] tty: serial: msm_serial: Deactivate RX DMA for polling support The CONSOLE_POLLING mode is used for tools like k(g)db. In this kind of setup, it is often sharing a serial device with the normal system console. This is usually no problem because the polling helpers can consume input values directly (when in kgdb context) and the normal Linux handlers can only consume new input values after kgdb switched back. This is not true anymore when RX DMA is enabled for UARTDM controllers. Single input values can no longer be received correctly. Instead following seems to happen: * on 1. input, some old input is read (continuously) * on 2. input, two old inputs are read (continuously) * on 3. input, three old input values are read (continuously) * on 4. input, 4 previous inputs are received This repeats then for each group of 4 input values. This behavior changes slightly depending on what state the controller was when the first input was received. But this makes working with kgdb basically impossible because control messages are always corrupted when kgdboc tries to parse them. RX DMA should therefore be off when CONSOLE_POLLING is enabled to avoid these kind of problems. No such problem was noticed for TX DMA. Fixes: 99693945013a ("tty: serial: msm: Add RX DMA support") Cc: stable@vger.kernel.org Signed-off-by: Sven Eckelmann Link: https://lore.kernel.org/r/20211113121050.7266-1-sven@narfation.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/msm_serial.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c index fcef7a961430..489d19274f9a 100644 --- a/drivers/tty/serial/msm_serial.c +++ b/drivers/tty/serial/msm_serial.c @@ -598,6 +598,9 @@ static void msm_start_rx_dma(struct msm_port *msm_port) u32 val; int ret; + if (IS_ENABLED(CONFIG_CONSOLE_POLL)) + return; + if (!dma->chan) return; From 3dfac26e2ef29ff2abc2a75aa4cd48fce25a2c4b Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 26 Oct 2021 00:26:22 +0200 Subject: [PATCH 029/253] vgacon: Propagate console boot parameters before calling `vc_resize' Fix a division by zero in `vgacon_resize' with a backtrace like: vgacon_resize vc_do_resize vgacon_init do_bind_con_driver do_unbind_con_driver fbcon_fb_unbind do_unregister_framebuffer do_register_framebuffer register_framebuffer __drm_fb_helper_initial_config_and_unlock drm_helper_hpd_irq_event dw_hdmi_irq irq_thread kthread caused by `c->vc_cell_height' not having been initialized. This has only started to trigger with commit 860dafa90259 ("vt: Fix character height handling with VT_RESIZEX"), however the ultimate offender is commit 50ec42edd978 ("[PATCH] Detaching fbcon: fix vgacon to allow retaking of the console"). Said commit has added a call to `vc_resize' whenever `vgacon_init' is called with the `init' argument set to 0, which did not happen before. And the call is made before a key vgacon boot parameter retrieved in `vgacon_startup' has been propagated in `vgacon_init' for `vc_resize' to use to the console structure being worked on. Previously the parameter was `c->vc_font.height' and now it is `c->vc_cell_height'. In this particular scenario the registration of fbcon has failed and vt resorts to vgacon. Now fbcon does have initialized `c->vc_font.height' somehow, unlike `c->vc_cell_height', which is why this code did not crash before, but either way the boot parameters should have been copied to the console structure ahead of the call to `vc_resize' rather than afterwards, so that first the call has a chance to use them and second they do not change the console structure to something possibly different from what was used by `vc_resize'. Move the propagation of the vgacon boot parameters ahead of the call to `vc_resize' then. Adjust the comment accordingly. Fixes: 50ec42edd978 ("[PATCH] Detaching fbcon: fix vgacon to allow retaking of the console") Cc: stable@vger.kernel.org # v2.6.18+ Reported-by: Wim Osterholt Reported-by: Pavel V. Panteleev Signed-off-by: Maciej W. Rozycki Link: https://lore.kernel.org/r/alpine.DEB.2.21.2110252317110.58149@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- drivers/video/console/vgacon.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index ef9c57ce0906..9a49ea6b5112 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -366,11 +366,17 @@ static void vgacon_init(struct vc_data *c, int init) struct uni_pagedir *p; /* - * We cannot be loaded as a module, therefore init is always 1, - * but vgacon_init can be called more than once, and init will - * not be 1. + * We cannot be loaded as a module, therefore init will be 1 + * if we are the default console, however if we are a fallback + * console, for example if fbcon has failed registration, then + * init will be 0, so we need to make sure our boot parameters + * have been copied to the console structure for vgacon_resize + * ultimately called by vc_resize. Any subsequent calls to + * vgacon_init init will have init set to 0 too. */ c->vc_can_do_color = vga_can_do_color; + c->vc_scan_lines = vga_scan_lines; + c->vc_font.height = c->vc_cell_height = vga_video_font_height; /* set dimensions manually if init != 0 since vc_resize() will fail */ if (init) { @@ -379,8 +385,6 @@ static void vgacon_init(struct vc_data *c, int init) } else vc_resize(c, vga_video_num_columns, vga_video_num_lines); - c->vc_scan_lines = vga_scan_lines; - c->vc_font.height = c->vc_cell_height = vga_video_font_height; c->vc_complement_mask = 0x7700; if (vga_512_chars) c->vc_hi_font_mask = 0x0800; From 0f55f89d98c8b3e12b4f55f71c127a173e29557c Mon Sep 17 00:00:00 2001 From: Ilia Sergachev Date: Mon, 15 Nov 2021 22:49:44 +0100 Subject: [PATCH 030/253] serial: liteuart: Fix NULL pointer dereference in ->remove() drvdata has to be set in _probe() - otherwise platform_get_drvdata() causes null pointer dereference BUG in _remove(). Fixes: 1da81e5562fa ("drivers/tty/serial: add LiteUART driver") Cc: stable Reviewed-by: Johan Hovold Signed-off-by: Ilia Sergachev Link: https://lore.kernel.org/r/20211115224944.23f8c12b@dtkw Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/liteuart.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/liteuart.c b/drivers/tty/serial/liteuart.c index dbc0559a9157..f075f4ff5fcf 100644 --- a/drivers/tty/serial/liteuart.c +++ b/drivers/tty/serial/liteuart.c @@ -285,6 +285,8 @@ static int liteuart_probe(struct platform_device *pdev) port->line = dev_id; spin_lock_init(&port->lock); + platform_set_drvdata(pdev, port); + return uart_add_one_port(&liteuart_driver, &uart->port); } From 05f929b395dec8957b636ff14e66b277ed022ed9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 17 Nov 2021 11:05:10 +0100 Subject: [PATCH 031/253] serial: liteuart: fix use-after-free and memleak on unbind Deregister the port when unbinding the driver to prevent it from being used after releasing the driver data and leaking memory allocated by serial core. Fixes: 1da81e5562fa ("drivers/tty/serial: add LiteUART driver") Cc: stable@vger.kernel.org # 5.11 Cc: Filip Kokosinski Cc: Mateusz Holenko Reviewed-by: Stafford Horne Reviewed-by: Andy Shevchenko Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20211117100512.5058-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/liteuart.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/liteuart.c b/drivers/tty/serial/liteuart.c index f075f4ff5fcf..da792d0df790 100644 --- a/drivers/tty/serial/liteuart.c +++ b/drivers/tty/serial/liteuart.c @@ -295,6 +295,7 @@ static int liteuart_remove(struct platform_device *pdev) struct uart_port *port = platform_get_drvdata(pdev); struct liteuart_port *uart = to_liteuart_port(port); + uart_remove_one_port(&liteuart_driver, port); xa_erase(&liteuart_array, uart->id); return 0; From dd5e90b16cca8a697cbe17b72e2a5f49291cabb2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 17 Nov 2021 11:05:11 +0100 Subject: [PATCH 032/253] serial: liteuart: fix minor-number leak on probe errors Make sure to release the allocated minor number before returning on probe errors. Fixes: 1da81e5562fa ("drivers/tty/serial: add LiteUART driver") Cc: stable@vger.kernel.org # 5.11 Cc: Filip Kokosinski Cc: Mateusz Holenko Reviewed-by: Stafford Horne Reviewed-by: Andy Shevchenko Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20211117100512.5058-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/liteuart.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/liteuart.c b/drivers/tty/serial/liteuart.c index da792d0df790..2941659e5274 100644 --- a/drivers/tty/serial/liteuart.c +++ b/drivers/tty/serial/liteuart.c @@ -270,8 +270,10 @@ static int liteuart_probe(struct platform_device *pdev) /* get membase */ port->membase = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); - if (IS_ERR(port->membase)) - return PTR_ERR(port->membase); + if (IS_ERR(port->membase)) { + ret = PTR_ERR(port->membase); + goto err_erase_id; + } /* values not from device tree */ port->dev = &pdev->dev; @@ -287,7 +289,16 @@ static int liteuart_probe(struct platform_device *pdev) platform_set_drvdata(pdev, port); - return uart_add_one_port(&liteuart_driver, &uart->port); + ret = uart_add_one_port(&liteuart_driver, &uart->port); + if (ret) + goto err_erase_id; + + return 0; + +err_erase_id: + xa_erase(&liteuart_array, uart->id); + + return ret; } static int liteuart_remove(struct platform_device *pdev) From 0b993fc1fec7b43a75b875763dc58c5940eea47a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 17 Nov 2021 11:05:12 +0100 Subject: [PATCH 033/253] serial: liteuart: relax compile-test dependencies The LITEX symbol is neither a build or runtime dependency for the liteuart serial driver. LITEX is selected by the "LiteX SoC Controller" driver, which does a probe-time register-access sanity check and panics if the SoC has not been configured correctly. That driver's Kconfig entry asserts that any LiteX driver using the LiteX register accessors should depend on LITEX, but currently only the serial driver complies. Relax this LITEX "dependency" in order to make it easier to compile test the driver. Reviewed-by: Andy Shevchenko Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20211117100512.5058-4-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 6ff94cfcd9db..fc543ac97c13 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -1533,7 +1533,7 @@ config SERIAL_LITEUART tristate "LiteUART serial port support" depends on HAS_IOMEM depends on OF || COMPILE_TEST - depends on LITEX + depends on LITEX || COMPILE_TEST select SERIAL_CORE help This driver is for the FPGA-based LiteUART serial controller from LiteX From b40de7469ef135161c80af0e8c462298cc5dac00 Mon Sep 17 00:00:00 2001 From: Patrik John Date: Tue, 23 Nov 2021 14:27:38 +0100 Subject: [PATCH 034/253] serial: tegra: Change lower tolerance baud rate limit for tegra20 and tegra30 The current implementation uses 0 as lower limit for the baud rate tolerance for tegra20 and tegra30 chips which causes isses on UART initialization as soon as baud rate clock is lower than required even when within the standard UART tolerance of +/- 4%. This fix aligns the implementation with the initial commit description of +/- 4% tolerance for tegra chips other than tegra186 and tegra194. Fixes: d781ec21bae6 ("serial: tegra: report clk rate errors") Cc: stable Signed-off-by: Patrik John Link: https://lore.kernel.org/r/sig.19614244f8.20211123132737.88341-1-patrik.john@u-blox.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial-tegra.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c index 45e2e4109acd..b6223fab0687 100644 --- a/drivers/tty/serial/serial-tegra.c +++ b/drivers/tty/serial/serial-tegra.c @@ -1506,7 +1506,7 @@ static struct tegra_uart_chip_data tegra20_uart_chip_data = { .fifo_mode_enable_status = false, .uart_max_port = 5, .max_dma_burst_bytes = 4, - .error_tolerance_low_range = 0, + .error_tolerance_low_range = -4, .error_tolerance_high_range = 4, }; @@ -1517,7 +1517,7 @@ static struct tegra_uart_chip_data tegra30_uart_chip_data = { .fifo_mode_enable_status = false, .uart_max_port = 5, .max_dma_burst_bytes = 4, - .error_tolerance_low_range = 0, + .error_tolerance_low_range = -4, .error_tolerance_high_range = 4, }; From 4e9679738a918d8a482ac6a2cb2bb871f094bb84 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 24 Nov 2021 08:31:09 +0100 Subject: [PATCH 035/253] Revert "tty: serial: fsl_lpuart: drop earlycon entry for i.MX8QXP" Revert commit b4b844930f27 ("tty: serial: fsl_lpuart: drop earlycon entry for i.MX8QXP"), because this breaks earlycon support on imx8qm/imx8qxp. While it is true that for earlycon there is no difference between i.MX8QXP and i.MX7ULP (for now at least), there are differences regarding clocks and fixups for wakeup support. For that reason it was deemed unacceptable to add the imx7ulp compatible to device tree in order to get earlycon working again. Reviewed-by: Peng Fan Signed-off-by: Alexander Stein Link: https://lore.kernel.org/r/20211124073109.805088-1-alexander.stein@ew.tq-group.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index b1e7190ae483..ac5112def40d 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2625,6 +2625,7 @@ OF_EARLYCON_DECLARE(lpuart, "fsl,vf610-lpuart", lpuart_early_console_setup); OF_EARLYCON_DECLARE(lpuart32, "fsl,ls1021a-lpuart", lpuart32_early_console_setup); OF_EARLYCON_DECLARE(lpuart32, "fsl,ls1028a-lpuart", ls1028a_early_console_setup); OF_EARLYCON_DECLARE(lpuart32, "fsl,imx7ulp-lpuart", lpuart32_imx_early_console_setup); +OF_EARLYCON_DECLARE(lpuart32, "fsl,imx8qxp-lpuart", lpuart32_imx_early_console_setup); EARLYCON_DECLARE(lpuart, lpuart_early_console_setup); EARLYCON_DECLARE(lpuart32, lpuart32_early_console_setup); From f85e04503f369b3f2be28c83fc48b74e19936ebc Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Mon, 22 Nov 2021 16:58:24 +0100 Subject: [PATCH 036/253] serial: 8250: Fix RTS modem control while in rs485 mode Commit f45709df7731 ("serial: 8250: Don't touch RTS modem control while in rs485 mode") sought to prevent user space from interfering with rs485 communication by ignoring a TIOCMSET ioctl() which changes RTS polarity. It did so in serial8250_do_set_mctrl(), which turns out to be too deep in the call stack: When a uart_port is opened, RTS polarity is set by the rs485-aware function uart_port_dtr_rts(). It calls down to serial8250_do_set_mctrl() and that particular RTS polarity change should *not* be ignored. The user-visible result is that on 8250_omap ports which use rs485 with inverse polarity (RTS bit in MCR register is 1 to receive, 0 to send), a newly opened port initially sets up RTS for sending instead of receiving. That's because omap_8250_startup() sets the cached value up->mcr to 0 and omap_8250_restore_regs() subsequently writes it to the MCR register. Due to the commit, serial8250_do_set_mctrl() preserves that incorrect register value: do_sys_openat2 do_filp_open path_openat vfs_open do_dentry_open chrdev_open tty_open uart_open tty_port_open uart_port_activate uart_startup uart_port_startup serial8250_startup omap_8250_startup # up->mcr = 0 uart_change_speed serial8250_set_termios omap_8250_set_termios omap_8250_restore_regs serial8250_out_MCR # up->mcr written tty_port_block_til_ready uart_dtr_rts uart_port_dtr_rts serial8250_set_mctrl omap8250_set_mctrl serial8250_do_set_mctrl # mcr[1] = 1 ignored Fix by intercepting RTS changes from user space in uart_tiocmset() instead. Link: https://lore.kernel.org/linux-serial/20211027111644.1996921-1-baocheng.su@siemens.com/ Fixes: f45709df7731 ("serial: 8250: Don't touch RTS modem control while in rs485 mode") Cc: Chao Zeng Cc: stable@vger.kernel.org # v5.7+ Reported-by: Su Bao Cheng Reported-by: Jan Kiszka Tested-by: Su Bao Cheng Signed-off-by: Lukas Wunner Link: https://lore.kernel.org/r/21170e622a1aaf842a50b32146008b5374b3dd1d.1637596432.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 7 ------- drivers/tty/serial/serial_core.c | 5 +++++ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 5775cbff8f6e..46e2079ad1aa 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2024,13 +2024,6 @@ void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl) struct uart_8250_port *up = up_to_u8250p(port); unsigned char mcr; - if (port->rs485.flags & SER_RS485_ENABLED) { - if (serial8250_in_MCR(up) & UART_MCR_RTS) - mctrl |= TIOCM_RTS; - else - mctrl &= ~TIOCM_RTS; - } - mcr = serial8250_TIOCM_to_MCR(mctrl); mcr = (mcr & up->mcr_mask) | up->mcr_force | up->mcr; diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 8968d15d7804..61e3dd0222af 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1075,6 +1075,11 @@ uart_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) goto out; if (!tty_io_error(tty)) { + if (uport->rs485.flags & SER_RS485_ENABLED) { + set &= ~TIOCM_RTS; + clear &= ~TIOCM_RTS; + } + uart_update_mctrl(uport, set, clear); ret = 0; } From c525c5d2437f93520388920baac6d9340c65d239 Mon Sep 17 00:00:00 2001 From: Jay Dolan Date: Mon, 22 Nov 2021 14:06:03 +0200 Subject: [PATCH 037/253] serial: 8250_pci: Fix ACCES entries in pci_serial_quirks array Fix error in table for PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S that caused it and PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4 to be missing their fourth port. Fixes: 78d3820b9bd3 ("serial: 8250_pci: Have ACCES cards that use the four port Pericom PI7C9X7954 chip use the pci_pericom_setup()") Cc: stable Signed-off-by: Jay Dolan Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211122120604.3909-2-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 5d43de143f33..b793d848aeb6 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -2291,12 +2291,19 @@ static struct pci_serial_quirk pci_serial_quirks[] = { .setup = pci_pericom_setup_four_at_eight, }, { - .vendor = PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S, + .vendor = PCI_VENDOR_ID_ACCESIO, .device = PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, .setup = pci_pericom_setup_four_at_eight, }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup_four_at_eight, + }, { .vendor = PCI_VENDOR_ID_ACCESIO, .device = PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4, From bb1201d4b38ec67bd9a871cf86b0cc10f28b15b5 Mon Sep 17 00:00:00 2001 From: Jay Dolan Date: Mon, 22 Nov 2021 14:06:04 +0200 Subject: [PATCH 038/253] serial: 8250_pci: rewrite pericom_do_set_divisor() Have pericom_do_set_divisor() use the uartclk instead of a hard coded value to work with different speed crystals. Tested with 14.7456 and 24 MHz crystals. Have pericom_do_set_divisor() always calculate the divisor rather than call serial8250_do_set_divisor() for rates below baud_base. Do not write registers or call serial8250_do_set_divisor() if valid divisors could not be found. Fixes: 6bf4e42f1d19 ("serial: 8250: Add support for higher baud rates to Pericom chips") Cc: stable Signed-off-by: Jay Dolan Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211122120604.3909-3-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index b793d848aeb6..60f8fffdfd77 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1324,29 +1324,33 @@ pericom_do_set_divisor(struct uart_port *port, unsigned int baud, { int scr; int lcr; - int actual_baud; - int tolerance; - for (scr = 5 ; scr <= 15 ; scr++) { - actual_baud = 921600 * 16 / scr; - tolerance = actual_baud / 50; + for (scr = 16; scr > 4; scr--) { + unsigned int maxrate = port->uartclk / scr; + unsigned int divisor = max(maxrate / baud, 1U); + int delta = maxrate / divisor - baud; - if ((baud < actual_baud + tolerance) && - (baud > actual_baud - tolerance)) { + if (baud > maxrate + baud / 50) + continue; + if (delta > baud / 50) + divisor++; + + if (divisor > 0xffff) + continue; + + /* Update delta due to possible divisor change */ + delta = maxrate / divisor - baud; + if (abs(delta) < baud / 50) { lcr = serial_port_in(port, UART_LCR); serial_port_out(port, UART_LCR, lcr | 0x80); - - serial_port_out(port, UART_DLL, 1); - serial_port_out(port, UART_DLM, 0); + serial_port_out(port, UART_DLL, divisor & 0xff); + serial_port_out(port, UART_DLM, divisor >> 8 & 0xff); serial_port_out(port, 2, 16 - scr); serial_port_out(port, UART_LCR, lcr); return; - } else if (baud > actual_baud) { - break; } } - serial8250_do_set_divisor(port, baud, quot, quot_frac); } static int pci_pericom_setup(struct serial_private *priv, const struct pciserial_board *board, From afece15a68dc83b438cc4c3a64634e48a5735573 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 Oct 2021 16:42:56 +0200 Subject: [PATCH 039/253] drm: msm: fix building without CONFIG_COMMON_CLK When CONFIG_COMMON_CLOCK is disabled, the 8996 specific phy code is left out, which results in a link failure: ld: drivers/gpu/drm/msm/hdmi/hdmi_phy.o:(.rodata+0x3f0): undefined reference to `msm_hdmi_phy_8996_cfg' This was only exposed after it became possible to build test the driver without the clock interfaces. Make COMMON_CLK a hard dependency for compile testing, and simplify it a little based on that. Fixes: b3ed524f84f5 ("drm/msm: allow compile_test on !ARM") Reported-by: Randy Dunlap Suggested-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20211013144308.2248978-1-arnd@kernel.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/Kconfig | 2 +- drivers/gpu/drm/msm/Makefile | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index ae11061727ff..39197b4beea7 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -4,8 +4,8 @@ config DRM_MSM tristate "MSM DRM" depends on DRM depends on ARCH_QCOM || SOC_IMX5 || COMPILE_TEST + depends on COMMON_CLK depends on IOMMU_SUPPORT - depends on (OF && COMMON_CLK) || COMPILE_TEST depends on QCOM_OCMEM || QCOM_OCMEM=n depends on QCOM_LLCC || QCOM_LLCC=n depends on QCOM_COMMAND_DB || QCOM_COMMAND_DB=n diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 40577f8856d8..093454457545 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -23,8 +23,10 @@ msm-y := \ hdmi/hdmi_i2c.o \ hdmi/hdmi_phy.o \ hdmi/hdmi_phy_8960.o \ + hdmi/hdmi_phy_8996.o \ hdmi/hdmi_phy_8x60.o \ hdmi/hdmi_phy_8x74.o \ + hdmi/hdmi_pll_8960.o \ edp/edp.o \ edp/edp_aux.o \ edp/edp_bridge.o \ @@ -37,6 +39,7 @@ msm-y := \ disp/mdp4/mdp4_dtv_encoder.o \ disp/mdp4/mdp4_lcdc_encoder.o \ disp/mdp4/mdp4_lvds_connector.o \ + disp/mdp4/mdp4_lvds_pll.o \ disp/mdp4/mdp4_irq.o \ disp/mdp4/mdp4_kms.o \ disp/mdp4/mdp4_plane.o \ @@ -116,9 +119,6 @@ msm-$(CONFIG_DRM_MSM_DP)+= dp/dp_aux.o \ dp/dp_audio.o msm-$(CONFIG_DRM_FBDEV_EMULATION) += msm_fbdev.o -msm-$(CONFIG_COMMON_CLK) += disp/mdp4/mdp4_lvds_pll.o -msm-$(CONFIG_COMMON_CLK) += hdmi/hdmi_pll_8960.o -msm-$(CONFIG_COMMON_CLK) += hdmi/hdmi_phy_8996.o msm-$(CONFIG_DRM_MSM_HDMI_HDCP) += hdmi/hdmi_hdcp.o From 52d04d408185b7aa47628d2339c28ec70074e0ae Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 4 Nov 2021 15:04:10 +0100 Subject: [PATCH 040/253] s390/pci: move pseudo-MMIO to prevent MIO overlap When running without MIO support, with pci=nomio or for devices which are not MIO-capable the zPCI subsystem generates pseudo-MMIO addresses to allow access to PCI BARs via MMIO based Linux APIs even though the platform uses function handles and BAR numbers. This is done by stashing an index into our global IOMAP array which contains the function handle in the 16 most significant bits of the addresses returned by ioremap() always setting the most significant bit. On the other hand the MIO addresses assigned by the platform for use, while requiring special instructions, allow PCI access with virtually mapped physical addresses. Now the problem is that these MIO addresses and our own pseudo-MMIO addresses may overlap, while functionally this would not be a problem by itself this overlap is detected by common code as both address types are added as resources in the iomem_resource tree. This leads to the overlapping resource claim of either the MIO capable or non-MIO capable devices with being rejected. Since PCI is tightly coupled to the use of the iomem_resource tree, see for example the code for request_mem_region(), we can't reasonably get rid of the overlap being detected by keeping our pseudo-MMIO addresses out of the iomem_resource tree. Instead let's move the range used by our own pseudo-MMIO addresses by starting at (1UL << 62) and only using addresses below (1UL << 63) thus avoiding the range currently used for MIO addresses. Fixes: c7ff0e918a7c ("s390/pci: deal with devices that have no support for MIO instructions") Cc: stable@vger.kernel.org # 5.3+ Reviewed-by: Pierre Morel Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/include/asm/pci_io.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index e4dc64cc9c55..287bb88f7698 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -14,12 +14,13 @@ /* I/O Map */ #define ZPCI_IOMAP_SHIFT 48 -#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000UL +#define ZPCI_IOMAP_ADDR_SHIFT 62 +#define ZPCI_IOMAP_ADDR_BASE (1UL << ZPCI_IOMAP_ADDR_SHIFT) #define ZPCI_IOMAP_ADDR_OFF_MASK ((1UL << ZPCI_IOMAP_SHIFT) - 1) #define ZPCI_IOMAP_MAX_ENTRIES \ - ((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT)) + (1UL << (ZPCI_IOMAP_ADDR_SHIFT - ZPCI_IOMAP_SHIFT)) #define ZPCI_IOMAP_ADDR_IDX_MASK \ - (~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE) + ((ZPCI_IOMAP_ADDR_BASE - 1) & ~ZPCI_IOMAP_ADDR_OFF_MASK) struct zpci_iomap_entry { u32 fh; From b12764695c3fcade145890b67f82f8b139174cc7 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sat, 27 Nov 2021 21:42:14 +0200 Subject: [PATCH 041/253] i2c: cbus-gpio: set atomic transfer callback CBUS transfers have always been atomic, but after commit 63b96983a5dd ("i2c: core: introduce callbacks for atomic transfers") we started to see warnings during e.g. poweroff as the atomic callback is not explicitly set. Fix that. Fixes the following WARNING seen during Nokia N810 power down: [ 786.570617] reboot: Power down [ 786.573913] ------------[ cut here ]------------ [ 786.578826] WARNING: CPU: 0 PID: 672 at drivers/i2c/i2c-core.h:40 i2c_smbus_xfer+0x100/0x110 [ 786.587799] No atomic I2C transfer handler for 'i2c-2' Fixes: 63b96983a5dd ("i2c: core: introduce callbacks for atomic transfers") Signed-off-by: Aaro Koskinen Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-cbus-gpio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-cbus-gpio.c b/drivers/i2c/busses/i2c-cbus-gpio.c index 72df563477b1..f8639a4457d2 100644 --- a/drivers/i2c/busses/i2c-cbus-gpio.c +++ b/drivers/i2c/busses/i2c-cbus-gpio.c @@ -195,8 +195,9 @@ static u32 cbus_i2c_func(struct i2c_adapter *adapter) } static const struct i2c_algorithm cbus_i2c_algo = { - .smbus_xfer = cbus_i2c_smbus_xfer, - .functionality = cbus_i2c_func, + .smbus_xfer = cbus_i2c_smbus_xfer, + .smbus_xfer_atomic = cbus_i2c_smbus_xfer, + .functionality = cbus_i2c_func, }; static int cbus_i2c_remove(struct platform_device *pdev) From 9003fbe0f3674b972f56fa7e6bf3ac9dbfc4d0ec Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 28 Nov 2021 22:07:48 +0100 Subject: [PATCH 042/253] HID: quirks: Add quirk for the Microsoft Surface 3 type-cover Add a HID_QUIRK_NO_INIT_REPORTS quirk for the Microsoft Surface 3 (non pro) type-cover. Trying to init the reports seems to confuse the type-cover and causes 2 issues: 1. Despite hid-multitouch sending the command to switch the touchpad to multitouch mode, it keeps sending events on the mouse emulation interface. 2. The touchpad completely stops sending events after a reboot. Adding the HID_QUIRK_NO_INIT_REPORTS quirk fixes both issues. Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 96a455921c67..aeb907b57ab3 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -886,6 +886,7 @@ #define USB_DEVICE_ID_MS_TOUCH_COVER_2 0x07a7 #define USB_DEVICE_ID_MS_TYPE_COVER_2 0x07a9 #define USB_DEVICE_ID_MS_POWER_COVER 0x07da +#define USB_DEVICE_ID_MS_SURFACE3_COVER 0x07de #define USB_DEVICE_ID_MS_XBOX_ONE_S_CONTROLLER 0x02fd #define USB_DEVICE_ID_MS_PIXART_MOUSE 0x00cb #define USB_DEVICE_ID_8BITDO_SN30_PRO_PLUS 0x02e0 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 06b7908c874c..ee7e504e7279 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -124,6 +124,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_MCS, USB_DEVICE_ID_MCS_GAMEPADBLOCK), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PIXART_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER), HID_QUIRK_NO_INIT_REPORTS }, + { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE3_COVER), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE_PRO_2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TOUCH_COVER_2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_2), HID_QUIRK_NO_INIT_REPORTS }, From 7e78781df491e4beb475bac22e6c44236a5002d7 Mon Sep 17 00:00:00 2001 From: Gurchetan Singh Date: Mon, 22 Nov 2021 15:22:09 -0800 Subject: [PATCH 043/253] drm/virtgpu api: define a dummy fence signaled event The current virtgpu implementation of poll(..) drops events when VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK is enabled (otherwise it's like a normal DRM driver). This is because paravirtualized userspaces receives responses in a buffer of type BLOB_MEM_GUEST, not by read(..). To be in line with other DRM drivers and avoid specialized behavior, it is possible to define a dummy event for virtgpu. Paravirtualized userspace will now have to call read(..) on the DRM fd to receive the dummy event. Fixes: b10790434cf2 ("drm/virtgpu api: create context init feature") Reported-by: Daniel Vetter Signed-off-by: Gurchetan Singh Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20211122232210.602-2-gurchetansingh@google.com Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/virtio/virtgpu_drv.h | 1 - drivers/gpu/drm/virtio/virtgpu_ioctl.c | 2 +- include/uapi/drm/virtgpu_drm.h | 7 +++++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index e0265fe74aa5..0a194aaad419 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -138,7 +138,6 @@ struct virtio_gpu_fence_driver { spinlock_t lock; }; -#define VIRTGPU_EVENT_FENCE_SIGNALED_INTERNAL 0x10000000 struct virtio_gpu_fence_event { struct drm_pending_event base; struct drm_event event; diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 5618a1d5879c..3607646d3229 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -54,7 +54,7 @@ static int virtio_gpu_fence_event_create(struct drm_device *dev, if (!e) return -ENOMEM; - e->event.type = VIRTGPU_EVENT_FENCE_SIGNALED_INTERNAL; + e->event.type = VIRTGPU_EVENT_FENCE_SIGNALED; e->event.length = sizeof(e->event); ret = drm_event_reserve_init(dev, file, &e->base, &e->event); diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index a13e20cc66b4..0512fde5e697 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -196,6 +196,13 @@ struct drm_virtgpu_context_init { __u64 ctx_set_params; }; +/* + * Event code that's given when VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK is in + * effect. The event size is sizeof(drm_event), since there is no additional + * payload. + */ +#define VIRTGPU_EVENT_FENCE_SIGNALED 0x90000000 + #define DRM_IOCTL_VIRTGPU_MAP \ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map) From 42abd0043e0c64fa64e99adba534c76b9b15e6b8 Mon Sep 17 00:00:00 2001 From: Gurchetan Singh Date: Mon, 22 Nov 2021 15:22:10 -0800 Subject: [PATCH 044/253] drm/virtio: use drm_poll(..) instead of virtio_gpu_poll(..) With the use of dummy events, we can drop virtgpu specific behavior. Fixes: cd7f5ca33585 ("drm/virtio: implement context init: add virtio_gpu_fence_event") Reported-by: Daniel Vetter Signed-off-by: Gurchetan Singh Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20211122232210.602-3-gurchetansingh@google.com Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/virtio/virtgpu_drv.c | 42 +--------------------------- 1 file changed, 1 insertion(+), 41 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index d86e1ad4a972..5072dbb0669a 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -157,36 +157,6 @@ static void virtio_gpu_config_changed(struct virtio_device *vdev) schedule_work(&vgdev->config_changed_work); } -static __poll_t virtio_gpu_poll(struct file *filp, - struct poll_table_struct *wait) -{ - struct drm_file *drm_file = filp->private_data; - struct virtio_gpu_fpriv *vfpriv = drm_file->driver_priv; - struct drm_device *dev = drm_file->minor->dev; - struct virtio_gpu_device *vgdev = dev->dev_private; - struct drm_pending_event *e = NULL; - __poll_t mask = 0; - - if (!vgdev->has_virgl_3d || !vfpriv || !vfpriv->ring_idx_mask) - return drm_poll(filp, wait); - - poll_wait(filp, &drm_file->event_wait, wait); - - if (!list_empty(&drm_file->event_list)) { - spin_lock_irq(&dev->event_lock); - e = list_first_entry(&drm_file->event_list, - struct drm_pending_event, link); - drm_file->event_space += e->event->length; - list_del(&e->link); - spin_unlock_irq(&dev->event_lock); - - kfree(e); - mask |= EPOLLIN | EPOLLRDNORM; - } - - return mask; -} - static struct virtio_device_id id_table[] = { { VIRTIO_ID_GPU, VIRTIO_DEV_ANY_ID }, { 0 }, @@ -226,17 +196,7 @@ MODULE_AUTHOR("Dave Airlie "); MODULE_AUTHOR("Gerd Hoffmann "); MODULE_AUTHOR("Alon Levy"); -static const struct file_operations virtio_gpu_driver_fops = { - .owner = THIS_MODULE, - .open = drm_open, - .release = drm_release, - .unlocked_ioctl = drm_ioctl, - .compat_ioctl = drm_compat_ioctl, - .poll = virtio_gpu_poll, - .read = drm_read, - .llseek = noop_llseek, - .mmap = drm_gem_mmap -}; +DEFINE_DRM_GEM_FOPS(virtio_gpu_driver_fops); static const struct drm_driver driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_RENDER | DRIVER_ATOMIC, From 5fad50779083102e0c01919acf1c13d6012b9e38 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Fri, 26 Nov 2021 09:34:19 +1100 Subject: [PATCH 045/253] pata_falcon: Avoid type warnings from sparse The zero day bot reported some sparse complaints in pata_falcon.c. E.g. drivers/ata/pata_falcon.c:58:41: warning: cast removes address space '__iomem' of expression drivers/ata/pata_falcon.c:58:41: warning: incorrect type in argument 1 (different address spaces) drivers/ata/pata_falcon.c:58:41: expected unsigned short volatile [noderef] [usertype] __iomem *port drivers/ata/pata_falcon.c:58:41: got unsigned short [usertype] * The same thing shows up in 8 places, all told. Avoid this by removing unnecessary type casts. Cc: Jens Axboe Cc: Michael Schmitz Cc: Geert Uytterhoeven Reported-by: kernel test robot Suggested-by: Geert Uytterhoeven Signed-off-by: Finn Thain Reviewed-by: Geert Uytterhoeven Signed-off-by: Damien Le Moal --- drivers/ata/pata_falcon.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/ata/pata_falcon.c b/drivers/ata/pata_falcon.c index 121635aa8c00..823c88622e34 100644 --- a/drivers/ata/pata_falcon.c +++ b/drivers/ata/pata_falcon.c @@ -55,14 +55,14 @@ static unsigned int pata_falcon_data_xfer(struct ata_queued_cmd *qc, /* Transfer multiple of 2 bytes */ if (rw == READ) { if (swap) - raw_insw_swapw((u16 *)data_addr, (u16 *)buf, words); + raw_insw_swapw(data_addr, (u16 *)buf, words); else - raw_insw((u16 *)data_addr, (u16 *)buf, words); + raw_insw(data_addr, (u16 *)buf, words); } else { if (swap) - raw_outsw_swapw((u16 *)data_addr, (u16 *)buf, words); + raw_outsw_swapw(data_addr, (u16 *)buf, words); else - raw_outsw((u16 *)data_addr, (u16 *)buf, words); + raw_outsw(data_addr, (u16 *)buf, words); } /* Transfer trailing byte, if any. */ @@ -74,16 +74,16 @@ static unsigned int pata_falcon_data_xfer(struct ata_queued_cmd *qc, if (rw == READ) { if (swap) - raw_insw_swapw((u16 *)data_addr, (u16 *)pad, 1); + raw_insw_swapw(data_addr, (u16 *)pad, 1); else - raw_insw((u16 *)data_addr, (u16 *)pad, 1); + raw_insw(data_addr, (u16 *)pad, 1); *buf = pad[0]; } else { pad[0] = *buf; if (swap) - raw_outsw_swapw((u16 *)data_addr, (u16 *)pad, 1); + raw_outsw_swapw(data_addr, (u16 *)pad, 1); else - raw_outsw((u16 *)data_addr, (u16 *)pad, 1); + raw_outsw(data_addr, (u16 *)pad, 1); } words++; } From 6c8ad7e8cf29eb55836e7a0215f967746ab2b504 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Fri, 26 Nov 2021 10:03:06 +0800 Subject: [PATCH 046/253] sata_fsl: fix UAF in sata_fsl_port_stop when rmmod sata_fsl When the `rmmod sata_fsl.ko` command is executed in the PPC64 GNU/Linux, a bug is reported: ================================================================== BUG: Unable to handle kernel data access on read at 0x80000800805b502c Oops: Kernel access of bad area, sig: 11 [#1] NIP [c0000000000388a4] .ioread32+0x4/0x20 LR [80000000000c6034] .sata_fsl_port_stop+0x44/0xe0 [sata_fsl] Call Trace: .free_irq+0x1c/0x4e0 (unreliable) .ata_host_stop+0x74/0xd0 [libata] .release_nodes+0x330/0x3f0 .device_release_driver_internal+0x178/0x2c0 .driver_detach+0x64/0xd0 .bus_remove_driver+0x70/0xf0 .driver_unregister+0x38/0x80 .platform_driver_unregister+0x14/0x30 .fsl_sata_driver_exit+0x18/0xa20 [sata_fsl] .__se_sys_delete_module+0x1ec/0x2d0 .system_call_exception+0xfc/0x1f0 system_call_common+0xf8/0x200 ================================================================== The triggering of the BUG is shown in the following stack: driver_detach device_release_driver_internal __device_release_driver drv->remove(dev) --> platform_drv_remove/platform_remove drv->remove(dev) --> sata_fsl_remove iounmap(host_priv->hcr_base); <---- unmap kfree(host_priv); <---- free devres_release_all release_nodes dr->node.release(dev, dr->data) --> ata_host_stop ap->ops->port_stop(ap) --> sata_fsl_port_stop ioread32(hcr_base + HCONTROL) <---- UAF host->ops->host_stop(host) The iounmap(host_priv->hcr_base) and kfree(host_priv) functions should not be executed in drv->remove. These functions should be executed in host_stop after port_stop. Therefore, we move these functions to the new function sata_fsl_host_stop and bind the new function to host_stop. Fixes: faf0b2e5afe7 ("drivers/ata: add support to Freescale 3.0Gbps SATA Controller") Cc: stable@vger.kernel.org Reported-by: Hulk Robot Signed-off-by: Baokun Li Reviewed-by: Sergei Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/sata_fsl.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index e5838b23c9e0..2eb216792695 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -1394,6 +1394,14 @@ static int sata_fsl_init_controller(struct ata_host *host) return 0; } +static void sata_fsl_host_stop(struct ata_host *host) +{ + struct sata_fsl_host_priv *host_priv = host->private_data; + + iounmap(host_priv->hcr_base); + kfree(host_priv); +} + /* * scsi mid-layer and libata interface structures */ @@ -1426,6 +1434,8 @@ static struct ata_port_operations sata_fsl_ops = { .port_start = sata_fsl_port_start, .port_stop = sata_fsl_port_stop, + .host_stop = sata_fsl_host_stop, + .pmp_attach = sata_fsl_pmp_attach, .pmp_detach = sata_fsl_pmp_detach, }; @@ -1558,8 +1568,6 @@ static int sata_fsl_remove(struct platform_device *ofdev) ata_host_detach(host); irq_dispose_mapping(host_priv->irq); - iounmap(host_priv->hcr_base); - kfree(host_priv); return 0; } From 6f48394cf1f3e8486591ad98c11cdadb8f1ef2ad Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Fri, 26 Nov 2021 10:03:07 +0800 Subject: [PATCH 047/253] sata_fsl: fix warning in remove_proc_entry when rmmod sata_fsl Trying to remove the fsl-sata module in the PPC64 GNU/Linux leads to the following warning: ------------[ cut here ]------------ remove_proc_entry: removing non-empty directory 'irq/69', leaking at least 'fsl-sata[ff0221000.sata]' WARNING: CPU: 3 PID: 1048 at fs/proc/generic.c:722 .remove_proc_entry+0x20c/0x220 IRQMASK: 0 NIP [c00000000033826c] .remove_proc_entry+0x20c/0x220 LR [c000000000338268] .remove_proc_entry+0x208/0x220 Call Trace: .remove_proc_entry+0x208/0x220 (unreliable) .unregister_irq_proc+0x104/0x140 .free_desc+0x44/0xb0 .irq_free_descs+0x9c/0xf0 .irq_dispose_mapping+0x64/0xa0 .sata_fsl_remove+0x58/0xa0 [sata_fsl] .platform_drv_remove+0x40/0x90 .device_release_driver_internal+0x160/0x2c0 .driver_detach+0x64/0xd0 .bus_remove_driver+0x70/0xf0 .driver_unregister+0x38/0x80 .platform_driver_unregister+0x14/0x30 .fsl_sata_driver_exit+0x18/0xa20 [sata_fsl] ---[ end trace 0ea876d4076908f5 ]--- The driver creates the mapping by calling irq_of_parse_and_map(), so it also has to dispose the mapping. But the easy way out is to simply use platform_get_irq() instead of irq_of_parse_map(). Also we should adapt return value checking and propagate error values. In this case the mapping is not managed by the device but by the of core, so the device has not to dispose the mapping. Fixes: faf0b2e5afe7 ("drivers/ata: add support to Freescale 3.0Gbps SATA Controller") Cc: stable@vger.kernel.org Reported-by: Hulk Robot Signed-off-by: Baokun Li Reviewed-by: Sergei Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/sata_fsl.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index 2eb216792695..3b31a4f596d8 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -1490,9 +1490,9 @@ static int sata_fsl_probe(struct platform_device *ofdev) host_priv->ssr_base = ssr_base; host_priv->csr_base = csr_base; - irq = irq_of_parse_and_map(ofdev->dev.of_node, 0); - if (!irq) { - dev_err(&ofdev->dev, "invalid irq from platform\n"); + irq = platform_get_irq(ofdev, 0); + if (irq < 0) { + retval = irq; goto error_exit_with_cleanup; } host_priv->irq = irq; @@ -1567,8 +1567,6 @@ static int sata_fsl_remove(struct platform_device *ofdev) ata_host_detach(host); - irq_dispose_mapping(host_priv->irq); - return 0; } From 0c21d02ca469574d2082379db52d1a27b99eed0c Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Mon, 20 Sep 2021 17:21:29 +0200 Subject: [PATCH 048/253] i2c: stm32f7: flush TX FIFO upon transfer errors While handling an error during transfer (ex: NACK), it could happen that the driver has already written data into TXDR before the transfer get stopped. This commit add TXDR Flush after end of transfer in case of error to avoid sending a wrong data on any other slave upon next transfer. Fixes: aeb068c57214 ("i2c: i2c-stm32f7: add driver") Signed-off-by: Alain Volmat Reviewed-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32f7.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index b9b19a2a2ffa..ed977b6f7ab6 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -1696,6 +1696,16 @@ static int stm32f7_i2c_xfer(struct i2c_adapter *i2c_adap, time_left = wait_for_completion_timeout(&i2c_dev->complete, i2c_dev->adap.timeout); ret = f7_msg->result; + if (ret) { + /* + * It is possible that some unsent data have already been + * written into TXDR. To avoid sending old data in a + * further transfer, flush TXDR in case of any error + */ + writel_relaxed(STM32F7_I2C_ISR_TXE, + i2c_dev->base + STM32F7_I2C_ISR); + goto pm_free; + } if (!time_left) { dev_dbg(i2c_dev->dev, "Access to slave 0x%x timed out\n", @@ -1744,8 +1754,16 @@ static int stm32f7_i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, timeout = wait_for_completion_timeout(&i2c_dev->complete, i2c_dev->adap.timeout); ret = f7_msg->result; - if (ret) + if (ret) { + /* + * It is possible that some unsent data have already been + * written into TXDR. To avoid sending old data in a + * further transfer, flush TXDR in case of any error + */ + writel_relaxed(STM32F7_I2C_ISR_TXE, + i2c_dev->base + STM32F7_I2C_ISR); goto pm_free; + } if (!timeout) { dev_dbg(dev, "Access to slave 0x%x timed out\n", f7_msg->addr); From 07fb78a78de4e67b5d6d5407aeee1250a327a698 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Fri, 26 Nov 2021 16:43:42 +0100 Subject: [PATCH 049/253] spi: spi-rockchip: Add rk3568-spi compatible This adds a compatible string for the SPI controller found on the RK3566 and RK3568 SoCs. Signed-off-by: Nicolas Frattaroli Link: https://lore.kernel.org/r/20211126154344.724316-2-frattaroli.nicolas@gmail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-rockchip.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml index 7f987e79337c..52a78a2e362e 100644 --- a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml +++ b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml @@ -33,6 +33,7 @@ properties: - rockchip,rk3328-spi - rockchip,rk3368-spi - rockchip,rk3399-spi + - rockchip,rk3568-spi - rockchip,rv1126-spi - const: rockchip,rk3066-spi From e3f9387aea67742b9d1f4de8e5bb2fd08a8a4584 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 29 Nov 2021 19:00:43 +0900 Subject: [PATCH 050/253] loop: Use pr_warn_once() for loop_control_remove() warning kernel test robot reported that RCU stall via printk() flooding is possible [1] when stress testing. Link: https://lkml.kernel.org/r/20211129073709.GA18483@xsang-OptiPlex-9020 [1] Reported-by: kernel test robot Reviewed-by: Christoph Hellwig Signed-off-by: Tetsuo Handa Signed-off-by: Jens Axboe --- drivers/block/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a154cab6cd98..c3a36cfaa855 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2103,7 +2103,7 @@ static int loop_control_remove(int idx) int ret; if (idx < 0) { - pr_warn("deleting an unspecified loop device is not supported.\n"); + pr_warn_once("deleting an unspecified loop device is not supported.\n"); return -EINVAL; } From 0c980a006d3fbee86c4d0698f66d6f5381831787 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 17 Nov 2021 10:45:22 +0100 Subject: [PATCH 051/253] drm/vc4: kms: Wait for the commit before increasing our clock rate Several DRM/KMS atomic commits can run in parallel if they affect different CRTC. These commits share the global HVS state, so we have some code to make sure we run commits in sequence. This synchronization code is one of the first thing that runs in vc4_atomic_commit_tail(). Another constraints we have is that we need to make sure the HVS clock gets a boost during the commit. That code relies on clk_set_min_rate and will remove the old minimum and set a new one. We also need another, temporary, minimum for the duration of the commit. The algorithm is thus to set a temporary minimum, drop the previous one, do the commit, and finally set the minimum for the current mode. However, the part that sets the temporary minimum and drops the older one runs before the commit synchronization code. Thus, under the proper conditions, we can end up mixing up the minimums and ending up with the wrong one for our current step. To avoid it, let's move the clock setup in the protected section. Fixes: d7d96c00e585 ("drm/vc4: hvs: Boost the core clock during modeset") Signed-off-by: Maxime Ripard Reviewed-by: Dave Stevenson Tested-by: Jian-Hong Pan Link: https://lore.kernel.org/r/20211117094527.146275-2-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_kms.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index f0b3e4cf5bce..764ddb41a4ce 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -353,9 +353,6 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) vc4_hvs_mask_underrun(dev, vc4_crtc_state->assigned_channel); } - if (vc4->hvs->hvs5) - clk_set_min_rate(hvs->core_clk, 500000000); - old_hvs_state = vc4_hvs_get_old_global_state(state); if (!old_hvs_state) return; @@ -377,6 +374,9 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) drm_err(dev, "Timed out waiting for commit\n"); } + if (vc4->hvs->hvs5) + clk_set_min_rate(hvs->core_clk, 500000000); + drm_atomic_helper_commit_modeset_disables(dev, state); vc4_ctm_commit(vc4, state); From f927767978d201d4ac023fcd797adbb963a6565d Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 17 Nov 2021 10:45:23 +0100 Subject: [PATCH 052/253] drm/vc4: kms: Fix return code check The HVS global state functions return an error pointer, but in most cases we check if it's NULL, possibly resulting in an invalid pointer dereference. Fixes: 9ec03d7f1ed3 ("drm/vc4: kms: Wait on previous FIFO users before a commit") Signed-off-by: Maxime Ripard Reviewed-by: Dave Stevenson Tested-by: Jian-Hong Pan Link: https://lore.kernel.org/r/20211117094527.146275-3-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_kms.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 764ddb41a4ce..3f780c195749 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -354,7 +354,7 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) } old_hvs_state = vc4_hvs_get_old_global_state(state); - if (!old_hvs_state) + if (IS_ERR(old_hvs_state)) return; for_each_old_crtc_in_state(state, crtc, old_crtc_state, i) { @@ -410,8 +410,8 @@ static int vc4_atomic_commit_setup(struct drm_atomic_state *state) unsigned int i; hvs_state = vc4_hvs_get_new_global_state(state); - if (!hvs_state) - return -EINVAL; + if (WARN_ON(IS_ERR(hvs_state))) + return PTR_ERR(hvs_state); for_each_new_crtc_in_state(state, crtc, crtc_state, i) { struct vc4_crtc_state *vc4_crtc_state = @@ -762,8 +762,8 @@ static int vc4_pv_muxing_atomic_check(struct drm_device *dev, unsigned int i; hvs_new_state = vc4_hvs_get_global_state(state); - if (!hvs_new_state) - return -EINVAL; + if (IS_ERR(hvs_new_state)) + return PTR_ERR(hvs_new_state); for (i = 0; i < ARRAY_SIZE(hvs_new_state->fifo_state); i++) if (!hvs_new_state->fifo_state[i].in_use) From 049cfff8d53a30cae3349ff71a4c01b7d9981bc2 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 17 Nov 2021 10:45:24 +0100 Subject: [PATCH 053/253] drm/vc4: kms: Add missing drm_crtc_commit_put Commit 9ec03d7f1ed3 ("drm/vc4: kms: Wait on previous FIFO users before a commit") introduced a global state for the HVS, with each FIFO storing the current CRTC commit so that we can properly synchronize commits. However, the refcounting was off and we thus ended up leaking the drm_crtc_commit structure every commit. Add a drm_crtc_commit_put to prevent the leakage. Fixes: 9ec03d7f1ed3 ("drm/vc4: kms: Wait on previous FIFO users before a commit") Signed-off-by: Maxime Ripard Reviewed-by: Dave Stevenson Tested-by: Jian-Hong Pan Link: https://lore.kernel.org/r/20211117094527.146275-4-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_kms.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 3f780c195749..7c1d0c3beba2 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -361,6 +361,7 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) struct vc4_crtc_state *vc4_crtc_state = to_vc4_crtc_state(old_crtc_state); unsigned int channel = vc4_crtc_state->assigned_channel; + struct drm_crtc_commit *commit; int ret; if (channel == VC4_HVS_CHANNEL_DISABLED) @@ -369,9 +370,15 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) if (!old_hvs_state->fifo_state[channel].in_use) continue; - ret = drm_crtc_commit_wait(old_hvs_state->fifo_state[channel].pending_commit); + commit = old_hvs_state->fifo_state[channel].pending_commit; + if (!commit) + continue; + + ret = drm_crtc_commit_wait(commit); if (ret) drm_err(dev, "Timed out waiting for commit\n"); + + drm_crtc_commit_put(commit); } if (vc4->hvs->hvs5) From d134c5ff71c7f2320fc7997f2fbbdedf0c76889a Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 17 Nov 2021 10:45:25 +0100 Subject: [PATCH 054/253] drm/vc4: kms: Clear the HVS FIFO commit pointer once done Commit 9ec03d7f1ed3 ("drm/vc4: kms: Wait on previous FIFO users before a commit") introduced a wait on the previous commit done on a given HVS FIFO. However, we never cleared that pointer once done. Since drm_crtc_commit_put can free the drm_crtc_commit structure directly if we were the last user, this means that it can lead to a use-after free if we were to duplicate the state, and that stale pointer would even be copied to the new state. Set the pointer to NULL once we're done with the wait so that we don't carry over a pointer to a free'd structure. Fixes: 9ec03d7f1ed3 ("drm/vc4: kms: Wait on previous FIFO users before a commit") Signed-off-by: Maxime Ripard Reviewed-by: Dave Stevenson Tested-by: Jian-Hong Pan Link: https://lore.kernel.org/r/20211117094527.146275-5-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_kms.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 7c1d0c3beba2..f80370e87e98 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -379,6 +379,7 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) drm_err(dev, "Timed out waiting for commit\n"); drm_crtc_commit_put(commit); + old_hvs_state->fifo_state[channel].pending_commit = NULL; } if (vc4->hvs->hvs5) From d354699e2292c60f25496d3c31ce4e7b1563b899 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 17 Nov 2021 10:45:26 +0100 Subject: [PATCH 055/253] drm/vc4: kms: Don't duplicate pending commit Our HVS global state, when duplicated, will also copy the pointer to the drm_crtc_commit (and increase the reference count) for each FIFO if the pointer is not NULL. However, our atomic_setup function will overwrite that pointer without putting the reference back leading to a memory leak. Since the commit is only relevant during the atomic commit process, it doesn't make sense to duplicate the reference to the commit anyway. Let's remove it. Fixes: 9ec03d7f1ed3 ("drm/vc4: kms: Wait on previous FIFO users before a commit") Signed-off-by: Maxime Ripard Reviewed-by: Dave Stevenson Tested-by: Jian-Hong Pan Link: https://lore.kernel.org/r/20211117094527.146275-6-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_kms.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index f80370e87e98..d9b3e3ad71ea 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -676,12 +676,6 @@ vc4_hvs_channels_duplicate_state(struct drm_private_obj *obj) for (i = 0; i < HVS_NUM_CHANNELS; i++) { state->fifo_state[i].in_use = old_state->fifo_state[i].in_use; - - if (!old_state->fifo_state[i].pending_commit) - continue; - - state->fifo_state[i].pending_commit = - drm_crtc_commit_get(old_state->fifo_state[i].pending_commit); } return &state->base; From 6052a3110be208e547a4a8aeb184446199a16e8a Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 17 Nov 2021 10:45:27 +0100 Subject: [PATCH 056/253] drm/vc4: kms: Fix previous HVS commit wait Our current code is supposed to serialise the commits by waiting for all the drm_crtc_commits associated to the previous HVS state. However, assuming we have two CRTCs running and being configured and we configure each one alternately, we end up in a situation where we're not waiting at all. Indeed, starting with a state (state 0) where both CRTCs are running, and doing a commit (state 1) on the first CRTC (CRTC 0), we'll associate its commit to its assigned FIFO in vc4_hvs_state. If we get a new commit (state 2), this time affecting the second CRTC (CRTC 1), the DRM core will allow both commits to execute in parallel (assuming they don't have any share resources). Our code in vc4_atomic_commit_tail is supposed to make sure we only get one commit at a time and serialised by order of submission. It does so by using for_each_old_crtc_in_state, making sure that the CRTC has a FIFO assigned, is used, and has a commit pending. If it does, then we'll wait for the commit before going forward. During the transition from state 0 to state 1, as our old CRTC state we get the CRTC 0 state 0, its commit, we wait for it, everything works fine. During the transition from state 1 to state 2 though, the use of for_each_old_crtc_in_state is wrong. Indeed, while the code assumes it's returning the state of the CRTC in the old state (so CRTC 0 state 1), it actually returns the old state of the CRTC affected by the current commit, so CRTC 0 state 0 since it wasn't part of state 1. Due to this, if we alternate between the configuration of CRTC 0 and CRTC 1, we never actually wait for anything since we should be waiting on the other every time, but it never is affected by the previous commit. Change the logic to, at every commit, look at every FIFO in the previous HVS state, and if it's in use and has a commit associated to it, wait for that commit. Fixes: 9ec03d7f1ed3 ("drm/vc4: kms: Wait on previous FIFO users before a commit") Signed-off-by: Maxime Ripard Reviewed-by: Dave Stevenson Tested-by: Jian-Hong Pan Link: https://lore.kernel.org/r/20211117094527.146275-7-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_kms.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index d9b3e3ad71ea..b61792d2aa65 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -337,10 +337,10 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) struct drm_device *dev = state->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_hvs *hvs = vc4->hvs; - struct drm_crtc_state *old_crtc_state; struct drm_crtc_state *new_crtc_state; struct drm_crtc *crtc; struct vc4_hvs_state *old_hvs_state; + unsigned int channel; int i; for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { @@ -357,16 +357,10 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) if (IS_ERR(old_hvs_state)) return; - for_each_old_crtc_in_state(state, crtc, old_crtc_state, i) { - struct vc4_crtc_state *vc4_crtc_state = - to_vc4_crtc_state(old_crtc_state); - unsigned int channel = vc4_crtc_state->assigned_channel; + for (channel = 0; channel < HVS_NUM_CHANNELS; channel++) { struct drm_crtc_commit *commit; int ret; - if (channel == VC4_HVS_CHANNEL_DISABLED) - continue; - if (!old_hvs_state->fifo_state[channel].in_use) continue; From 61e29a0956bdb09eac8aca7d9add9f902baff08b Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Fri, 5 Nov 2021 14:33:38 -0400 Subject: [PATCH 057/253] drm/i915: Add support for panels with VESA backlights with PWM enable/disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This simply adds proper support for panel backlights that can be controlled via VESA's backlight control protocol, but which also require that we enable and disable the backlight via PWM instead of via the DPCD interface. We also enable this by default, in order to fix some people's backlights that were broken by not having this enabled. For reference, backlights that require this and use VESA's backlight interface tend to be laptops with hybrid GPUs, but this very well may change in the future. v4: * Make sure that we call intel_backlight_level_to_pwm() in intel_dp_aux_vesa_enable_backlight() - vsyrjala Signed-off-by: Lyude Paul Link: https://gitlab.freedesktop.org/drm/intel/-/issues/3680 Fixes: fe7d52bccab6 ("drm/i915/dp: Don't use DPCD backlights that need PWM enable/disable") Reviewed-by: Ville Syrjälä Cc: # v5.12+ Link: https://patchwork.freedesktop.org/patch/msgid/20211105183342.130810-2-lyude@redhat.com (cherry picked from commit 04f0d6cc62cc1eaf9242c081520c024a17ba86a3) Signed-off-by: Rodrigo Vivi --- .../drm/i915/display/intel_dp_aux_backlight.c | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 569d17b4d00f..f05b71c01b8e 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -293,6 +293,13 @@ intel_dp_aux_vesa_enable_backlight(const struct intel_crtc_state *crtc_state, struct intel_panel *panel = &connector->panel; struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); + if (!panel->backlight.edp.vesa.info.aux_enable) { + u32 pwm_level = intel_backlight_invert_pwm_level(connector, + panel->backlight.pwm_level_max); + + panel->backlight.pwm_funcs->enable(crtc_state, conn_state, pwm_level); + } + drm_edp_backlight_enable(&intel_dp->aux, &panel->backlight.edp.vesa.info, level); } @@ -304,6 +311,10 @@ static void intel_dp_aux_vesa_disable_backlight(const struct drm_connector_state struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); drm_edp_backlight_disable(&intel_dp->aux, &panel->backlight.edp.vesa.info); + + if (!panel->backlight.edp.vesa.info.aux_enable) + panel->backlight.pwm_funcs->disable(old_conn_state, + intel_backlight_invert_pwm_level(connector, 0)); } static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, enum pipe pipe) @@ -321,6 +332,15 @@ static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, if (ret < 0) return ret; + if (!panel->backlight.edp.vesa.info.aux_enable) { + ret = panel->backlight.pwm_funcs->setup(connector, pipe); + if (ret < 0) { + drm_err(&i915->drm, + "Failed to setup PWM backlight controls for eDP backlight: %d\n", + ret); + return ret; + } + } panel->backlight.max = panel->backlight.edp.vesa.info.max; panel->backlight.min = 0; if (current_mode == DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD) { @@ -340,12 +360,7 @@ intel_dp_aux_supports_vesa_backlight(struct intel_connector *connector) struct intel_dp *intel_dp = intel_attached_dp(connector); struct drm_i915_private *i915 = dp_to_i915(intel_dp); - /* TODO: We currently only support AUX only backlight configurations, not backlights which - * require a mix of PWM and AUX controls to work. In the mean time, these machines typically - * work just fine using normal PWM controls anyway. - */ - if ((intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP) && - drm_edp_backlight_supported(intel_dp->edp_dpcd)) { + if (drm_edp_backlight_supported(intel_dp->edp_dpcd)) { drm_dbg_kms(&i915->drm, "AUX Backlight Control Supported!\n"); return true; } From d69dab7de208748ddf79143b39d98db55eee9b4a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 27 Nov 2021 10:14:49 +0100 Subject: [PATCH 058/253] docs: conf.py: fix support for Readthedocs v 1.0.0 As described at: https://stackoverflow.com/questions/23211695/modifying-content-width-of-the-sphinx-theme-read-the-docs since Sphinx 1.8, the standard way to setup a custom theme is to use html_css_files. While using html_context is OK with RTD 0.5.2, it doesn't work with 1.0.0, causing the theme to not load, producing a very weird html. Tested with: - Sphinx 1.7.9 + sphinx-rtd-theme 0.5.2 - Sphinx 2.4.4 + sphinx-rtd-theme 0.5.2 - Sphinx 2.4.4 + sphinx-rtd-theme 1.0.0 - Sphinx 4.3.0 + sphinx-rtd-theme 1.0.0 Reported-by: Hans Verkuil Tested-by: Hans Verkuil Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Tested-by: Akira Yokosawa Link: https://lore.kernel.org/r/80009f0d17ea0840d81e7e16fff6e7677919fdfc.1638004294.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/conf.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/Documentation/conf.py b/Documentation/conf.py index 17f7cee56987..76e5eb5cb62b 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -249,11 +249,16 @@ except ImportError: html_static_path = ['sphinx-static'] -html_context = { - 'css_files': [ - '_static/theme_overrides.css', - ], -} +html_css_files = [ + 'theme_overrides.css', +] + +if major <= 1 and minor < 8: + html_context = { + 'css_files': [ + '_static/theme_overrides.css', + ], + } # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied From 5c81691bb6461a474ac9d6ad5737c12e8f558a8b Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Fri, 26 Nov 2021 13:11:17 +0900 Subject: [PATCH 059/253] docs: admin-guide/blockdev: Remove digraph of node-states While node-states-8.dot has two digraphs, the dot(1) command can not properly handle multiple graphs in a DOT file and the kernel-doc page at https://www.kernel.org/doc/html/latest/admin-guide/blockdev/drbd/figures.html fails to render the graphs. It turned out that the digraph of node_states can be removed. Quote from Joel's reflection: On reflection, the digraph node_states can be removed entirely. It is too basic to contain any useful information. In addition it references "ioctl_set_state". The ioctl configuration interface for DRBD has long been removed. In fact, it was never in the upstream version of DRBD. Remove node_states and rename the DOT file peer_states-8.dot. Suggested-by: Joel Colledge Acked-by: Joel Colledge Signed-off-by: Akira Yokosawa Cc: Philipp Reisner Cc: Lars Ellenberg Link: https://lore.kernel.org/r/7df04f45-8746-e666-1a9d-a998f1ab1f91@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/blockdev/drbd/figures.rst | 4 ++-- .../blockdev/drbd/{node-states-8.dot => peer-states-8.dot} | 5 ----- 2 files changed, 2 insertions(+), 7 deletions(-) rename Documentation/admin-guide/blockdev/drbd/{node-states-8.dot => peer-states-8.dot} (71%) diff --git a/Documentation/admin-guide/blockdev/drbd/figures.rst b/Documentation/admin-guide/blockdev/drbd/figures.rst index bd9a4901fe46..9f73253ea353 100644 --- a/Documentation/admin-guide/blockdev/drbd/figures.rst +++ b/Documentation/admin-guide/blockdev/drbd/figures.rst @@ -25,6 +25,6 @@ Sub graphs of DRBD's state transitions :alt: disk-states-8.dot :align: center -.. kernel-figure:: node-states-8.dot - :alt: node-states-8.dot +.. kernel-figure:: peer-states-8.dot + :alt: peer-states-8.dot :align: center diff --git a/Documentation/admin-guide/blockdev/drbd/node-states-8.dot b/Documentation/admin-guide/blockdev/drbd/peer-states-8.dot similarity index 71% rename from Documentation/admin-guide/blockdev/drbd/node-states-8.dot rename to Documentation/admin-guide/blockdev/drbd/peer-states-8.dot index bfa54e1f8016..6dc3954954d6 100644 --- a/Documentation/admin-guide/blockdev/drbd/node-states-8.dot +++ b/Documentation/admin-guide/blockdev/drbd/peer-states-8.dot @@ -1,8 +1,3 @@ -digraph node_states { - Secondary -> Primary [ label = "ioctl_set_state()" ] - Primary -> Secondary [ label = "ioctl_set_state()" ] -} - digraph peer_states { Secondary -> Primary [ label = "recv state packet" ] Primary -> Secondary [ label = "recv state packet" ] From aa9b5e0df226edbf1879cb8f17a409cc3fd89c9d Mon Sep 17 00:00:00 2001 From: Erik Ekman Date: Fri, 19 Nov 2021 21:07:58 +0100 Subject: [PATCH 060/253] Documentation/process: fix self reference Instead link to the device tree document with the same name. Signed-off-by: Erik Ekman Link: https://lore.kernel.org/r/20211119200758.642474-1-erik@kryo.se Signed-off-by: Jonathan Corbet --- Documentation/process/submitting-patches.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index da085d63af9b..6b3aaed66fba 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -14,7 +14,8 @@ works, see Documentation/process/development-process.rst. Also, read Documentation/process/submit-checklist.rst for a list of items to check before submitting code. If you are submitting a driver, also read Documentation/process/submitting-drivers.rst; for device -tree binding patches, read Documentation/process/submitting-patches.rst. +tree binding patches, read +Documentation/devicetree/bindings/submitting-patches.rst. This documentation assumes that you're using ``git`` to prepare your patches. If you're unfamiliar with ``git``, you would be well-advised to learn how to From 333b11e541feeb79e7cce31dd5b280ceded388e4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 19 Nov 2021 15:56:03 -0300 Subject: [PATCH 061/253] Documentation: Add minimum pahole version A report was made in https://github.com/acmel/dwarves/issues/26 about pahole not being listed in the process/changes.rst file as being needed for building the kernel, address that. Link: https://github.com/acmel/dwarves/issues/26 Acked-by: Andrii Nakryiko Acked-by: Daniel Borkmann Cc: Alexei Starovoitov Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/YZPQ6+u2wTHRfR+W@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/YZfzQ0DvHD5o26Bt@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/process/changes.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index b398b8576417..cf908d79666e 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -35,6 +35,7 @@ GNU make 3.81 make --version binutils 2.23 ld -v flex 2.5.35 flex --version bison 2.0 bison --version +pahole 1.16 pahole --version util-linux 2.10o fdformat --version kmod 13 depmod -V e2fsprogs 1.41.4 e2fsck -V @@ -108,6 +109,16 @@ Bison Since Linux 4.16, the build system generates parsers during build. This requires bison 2.0 or later. +pahole: +------- + +Since Linux 5.2, if CONFIG_DEBUG_INFO_BTF is selected, the build system +generates BTF (BPF Type Format) from DWARF in vmlinux, a bit later from kernel +modules as well. This requires pahole v1.16 or later. + +It is found in the 'dwarves' or 'pahole' distro packages or from +https://fedorapeople.org/~acme/dwarves/. + Perl ---- From 7dc9fb47bc9a95f1cc6c5655341860c5e50f91d4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 24 Nov 2021 22:42:18 +0200 Subject: [PATCH 062/253] scsi: ufs: ufs-pci: Add support for Intel ADL Add PCI ID and callbacks to support Intel Alder Lake. Link: https://lore.kernel.org/r/20211124204218.1784559-1-adrian.hunter@intel.com Cc: stable@vger.kernel.org # v5.15+ Reviewed-by: Bart Van Assche Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pci.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index 51424557810d..f725248ba57f 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -421,6 +421,13 @@ static int ufs_intel_lkf_init(struct ufs_hba *hba) return err; } +static int ufs_intel_adl_init(struct ufs_hba *hba) +{ + hba->nop_out_timeout = 200; + hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8; + return ufs_intel_common_init(hba); +} + static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = { .name = "intel-pci", .init = ufs_intel_common_init, @@ -449,6 +456,15 @@ static struct ufs_hba_variant_ops ufs_intel_lkf_hba_vops = { .device_reset = ufs_intel_device_reset, }; +static struct ufs_hba_variant_ops ufs_intel_adl_hba_vops = { + .name = "intel-pci", + .init = ufs_intel_adl_init, + .exit = ufs_intel_common_exit, + .link_startup_notify = ufs_intel_link_startup_notify, + .resume = ufs_intel_resume, + .device_reset = ufs_intel_device_reset, +}; + #ifdef CONFIG_PM_SLEEP static int ufshcd_pci_restore(struct device *dev) { @@ -563,6 +579,8 @@ static const struct pci_device_id ufshcd_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x4B41), (kernel_ulong_t)&ufs_intel_ehl_hba_vops }, { PCI_VDEVICE(INTEL, 0x4B43), (kernel_ulong_t)&ufs_intel_ehl_hba_vops }, { PCI_VDEVICE(INTEL, 0x98FA), (kernel_ulong_t)&ufs_intel_lkf_hba_vops }, + { PCI_VDEVICE(INTEL, 0x51FF), (kernel_ulong_t)&ufs_intel_adl_hba_vops }, + { PCI_VDEVICE(INTEL, 0x54FF), (kernel_ulong_t)&ufs_intel_adl_hba_vops }, { } /* terminate list */ }; From b933d1faf8fa30d16171bcff404e39c41b2a7c84 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Mon, 20 Sep 2021 17:21:30 +0200 Subject: [PATCH 063/253] i2c: stm32f7: recover the bus on access timeout When getting an access timeout, ensure that the bus is in a proper state prior to returning the error. Fixes: aeb068c57214 ("i2c: i2c-stm32f7: add driver") Signed-off-by: Alain Volmat Reviewed-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32f7.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index ed977b6f7ab6..ad3459a3bc5e 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -1712,6 +1712,7 @@ static int stm32f7_i2c_xfer(struct i2c_adapter *i2c_adap, i2c_dev->msg->addr); if (i2c_dev->use_dma) dmaengine_terminate_all(dma->chan_using); + stm32f7_i2c_wait_free_bus(i2c_dev); ret = -ETIMEDOUT; } @@ -1769,6 +1770,7 @@ static int stm32f7_i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, dev_dbg(dev, "Access to slave 0x%x timed out\n", f7_msg->addr); if (i2c_dev->use_dma) dmaengine_terminate_all(dma->chan_using); + stm32f7_i2c_wait_free_bus(i2c_dev); ret = -ETIMEDOUT; goto pm_free; } From 31b90a95ccbbb4b628578ac17e3b3cc8eeacfe31 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Mon, 20 Sep 2021 17:21:31 +0200 Subject: [PATCH 064/253] i2c: stm32f7: stop dma transfer in case of NACK In case of receiving a NACK, the dma transfer should be stopped to avoid feeding data into the FIFO. Also ensure to properly return the proper error code and avoid waiting for the end of the dma completion in case of error happening during the transmission. Fixes: 7ecc8cfde553 ("i2c: i2c-stm32f7: Add DMA support") Signed-off-by: Alain Volmat Reviewed-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32f7.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index ad3459a3bc5e..50d5ae81d227 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -1493,6 +1493,7 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) { struct stm32f7_i2c_dev *i2c_dev = data; struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg; + struct stm32_i2c_dma *dma = i2c_dev->dma; void __iomem *base = i2c_dev->base; u32 status, mask; int ret = IRQ_HANDLED; @@ -1518,6 +1519,10 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) dev_dbg(i2c_dev->dev, "<%s>: Receive NACK (addr %x)\n", __func__, f7_msg->addr); writel_relaxed(STM32F7_I2C_ICR_NACKCF, base + STM32F7_I2C_ICR); + if (i2c_dev->use_dma) { + stm32f7_i2c_disable_dma_req(i2c_dev); + dmaengine_terminate_all(dma->chan_using); + } f7_msg->result = -ENXIO; } @@ -1533,7 +1538,7 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) /* Clear STOP flag */ writel_relaxed(STM32F7_I2C_ICR_STOPCF, base + STM32F7_I2C_ICR); - if (i2c_dev->use_dma) { + if (i2c_dev->use_dma && !f7_msg->result) { ret = IRQ_WAKE_THREAD; } else { i2c_dev->master_mode = false; @@ -1546,7 +1551,7 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) if (f7_msg->stop) { mask = STM32F7_I2C_CR2_STOP; stm32f7_i2c_set_bits(base + STM32F7_I2C_CR2, mask); - } else if (i2c_dev->use_dma) { + } else if (i2c_dev->use_dma && !f7_msg->result) { ret = IRQ_WAKE_THREAD; } else if (f7_msg->smbus) { stm32f7_i2c_smbus_rep_start(i2c_dev); From 1229f82deaece681cda664d95c856b68062aa159 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Mon, 20 Sep 2021 17:21:32 +0200 Subject: [PATCH 065/253] i2c: stm32f7: use proper DMAENGINE API for termination dmaengine_terminate_all() is deprecated in favor of explicitly saying if it should be sync or async. Here, we use dmaengine_terminate_sync in i2c_xfer and i2c_smbus_xfer handlers and rely on dmaengine_terminate_async within interrupt handlers (transmission error cases). dmaengine_synchronize is added within i2c_xfer and i2c_smbus_xfer handler to finalize terminate started in interrupt handlers. Signed-off-by: Alain Volmat Reviewed-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32f7.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index 50d5ae81d227..66145d2b9b55 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -1521,7 +1521,7 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) writel_relaxed(STM32F7_I2C_ICR_NACKCF, base + STM32F7_I2C_ICR); if (i2c_dev->use_dma) { stm32f7_i2c_disable_dma_req(i2c_dev); - dmaengine_terminate_all(dma->chan_using); + dmaengine_terminate_async(dma->chan_using); } f7_msg->result = -ENXIO; } @@ -1588,7 +1588,7 @@ static irqreturn_t stm32f7_i2c_isr_event_thread(int irq, void *data) if (!ret) { dev_dbg(i2c_dev->dev, "<%s>: Timed out\n", __func__); stm32f7_i2c_disable_dma_req(i2c_dev); - dmaengine_terminate_all(dma->chan_using); + dmaengine_terminate_async(dma->chan_using); f7_msg->result = -ETIMEDOUT; } @@ -1665,7 +1665,7 @@ static irqreturn_t stm32f7_i2c_isr_error(int irq, void *data) /* Disable dma */ if (i2c_dev->use_dma) { stm32f7_i2c_disable_dma_req(i2c_dev); - dmaengine_terminate_all(dma->chan_using); + dmaengine_terminate_async(dma->chan_using); } i2c_dev->master_mode = false; @@ -1702,6 +1702,9 @@ static int stm32f7_i2c_xfer(struct i2c_adapter *i2c_adap, i2c_dev->adap.timeout); ret = f7_msg->result; if (ret) { + if (i2c_dev->use_dma) + dmaengine_synchronize(dma->chan_using); + /* * It is possible that some unsent data have already been * written into TXDR. To avoid sending old data in a @@ -1716,7 +1719,7 @@ static int stm32f7_i2c_xfer(struct i2c_adapter *i2c_adap, dev_dbg(i2c_dev->dev, "Access to slave 0x%x timed out\n", i2c_dev->msg->addr); if (i2c_dev->use_dma) - dmaengine_terminate_all(dma->chan_using); + dmaengine_terminate_sync(dma->chan_using); stm32f7_i2c_wait_free_bus(i2c_dev); ret = -ETIMEDOUT; } @@ -1761,6 +1764,9 @@ static int stm32f7_i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, i2c_dev->adap.timeout); ret = f7_msg->result; if (ret) { + if (i2c_dev->use_dma) + dmaengine_synchronize(dma->chan_using); + /* * It is possible that some unsent data have already been * written into TXDR. To avoid sending old data in a @@ -1774,7 +1780,7 @@ static int stm32f7_i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, if (!timeout) { dev_dbg(dev, "Access to slave 0x%x timed out\n", f7_msg->addr); if (i2c_dev->use_dma) - dmaengine_terminate_all(dma->chan_using); + dmaengine_terminate_sync(dma->chan_using); stm32f7_i2c_wait_free_bus(i2c_dev); ret = -ETIMEDOUT; goto pm_free; From 6a631c0432dcccbcf45839016a07c015e335e9ae Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 27 Nov 2021 17:31:59 +0100 Subject: [PATCH 066/253] Documentation/locking/locktypes: Update migrate_disable() bits. The initial implementation of migrate_disable() for mainline was a wrapper around preempt_disable(). RT kernels substituted this with a real migrate disable implementation. Later on mainline gained true migrate disable support, but the documentation was not updated. Update the documentation, remove the claims about migrate_disable() mapping to preempt_disable() on non-PREEMPT_RT kernels. Fixes: 74d862b682f51 ("sched: Make migrate_disable/enable() independent of RT") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211127163200.10466-2-bigeasy@linutronix.de --- Documentation/locking/locktypes.rst | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Documentation/locking/locktypes.rst b/Documentation/locking/locktypes.rst index ddada4a53749..4fd7b70fcde1 100644 --- a/Documentation/locking/locktypes.rst +++ b/Documentation/locking/locktypes.rst @@ -439,11 +439,9 @@ preemption. The following substitution works on both kernels:: spin_lock(&p->lock); p->count += this_cpu_read(var2); -On a non-PREEMPT_RT kernel migrate_disable() maps to preempt_disable() -which makes the above code fully equivalent. On a PREEMPT_RT kernel migrate_disable() ensures that the task is pinned on the current CPU which in turn guarantees that the per-CPU access to var1 and var2 are staying on -the same CPU. +the same CPU while the task remains preemptible. The migrate_disable() substitution is not valid for the following scenario:: @@ -456,9 +454,8 @@ scenario:: p = this_cpu_ptr(&var1); p->val = func2(); -While correct on a non-PREEMPT_RT kernel, this breaks on PREEMPT_RT because -here migrate_disable() does not protect against reentrancy from a -preempting task. A correct substitution for this case is:: +This breaks because migrate_disable() does not protect against reentrancy from +a preempting task. A correct substitution for this case is:: func() { From 79364031c5b4365ca28ac0fa00acfab5bf465be1 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 27 Nov 2021 17:32:00 +0100 Subject: [PATCH 067/253] bpf: Make sure bpf_disable_instrumentation() is safe vs preemption. The initial implementation of migrate_disable() for mainline was a wrapper around preempt_disable(). RT kernels substituted this with a real migrate disable implementation. Later on mainline gained true migrate disable support, but neither documentation nor affected code were updated. Remove stale comments claiming that migrate_disable() is PREEMPT_RT only. Don't use __this_cpu_inc() in the !PREEMPT_RT path because preemption is not disabled and the RMW operation can be preempted. Fixes: 74d862b682f51 ("sched: Make migrate_disable/enable() independent of RT") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211127163200.10466-3-bigeasy@linutronix.de --- include/linux/bpf.h | 16 ++-------------- include/linux/filter.h | 3 --- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 84ff6ef49462..755f38e893be 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1353,28 +1353,16 @@ extern struct mutex bpf_stats_enabled_mutex; * kprobes, tracepoints) to prevent deadlocks on map operations as any of * these events can happen inside a region which holds a map bucket lock * and can deadlock on it. - * - * Use the preemption safe inc/dec variants on RT because migrate disable - * is preemptible on RT and preemption in the middle of the RMW operation - * might lead to inconsistent state. Use the raw variants for non RT - * kernels as migrate_disable() maps to preempt_disable() so the slightly - * more expensive save operation can be avoided. */ static inline void bpf_disable_instrumentation(void) { migrate_disable(); - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - this_cpu_inc(bpf_prog_active); - else - __this_cpu_inc(bpf_prog_active); + this_cpu_inc(bpf_prog_active); } static inline void bpf_enable_instrumentation(void) { - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - this_cpu_dec(bpf_prog_active); - else - __this_cpu_dec(bpf_prog_active); + this_cpu_dec(bpf_prog_active); migrate_enable(); } diff --git a/include/linux/filter.h b/include/linux/filter.h index 24b7ed2677af..534f678ca50f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -640,9 +640,6 @@ static __always_inline u32 bpf_prog_run(const struct bpf_prog *prog, const void * This uses migrate_disable/enable() explicitly to document that the * invocation of a BPF program does not require reentrancy protection * against a BPF program which is invoked from a preempting task. - * - * For non RT enabled kernels migrate_disable/enable() maps to - * preempt_disable/enable(), i.e. it disables also preemption. */ static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog, const void *ctx) From 49201b90af818654c5506a0decc18e111eadcb66 Mon Sep 17 00:00:00 2001 From: Fabrizio Bertocci Date: Mon, 29 Nov 2021 23:15:40 -0500 Subject: [PATCH 068/253] platform/x86: amd-pmc: Fix s2idle failures on certain AMD laptops On some AMD hardware laptops, the system fails communicating with the PMC when entering s2idle and the machine is battery powered. Hardware description: HP Pavilion Aero Laptop 13-be0097nr CPU: AMD Ryzen 7 5800U with Radeon Graphics GPU: 03:00.0 VGA compatible controller [0300]: Advanced Micro Devices, Inc. [AMD/ATI] Device [1002:1638] (rev c1) Detailed description of the problem (and investigation) here: https://gitlab.freedesktop.org/drm/amd/-/issues/1799 Patch is a single line: reduce the polling delay in half, from 100uSec to 50uSec when waiting for a change in state from the PMC after a write command operation. After changing the delay, I did not see a single failure on this machine (I have this fix for now more than one week and s2idle worked every single time on battery power). Cc: stable@vger.kernel.org Acked-by: Shyam Sundar S K Signed-off-by: Fabrizio Bertocci Link: https://lore.kernel.org/r/CADtzkx7TdfbwtaVEXUdD6YXPey52E-nZVQNs+Z41DTx7gqMqtw@mail.gmail.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index b7e50ed050a8..841c44cd64c2 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -76,7 +76,7 @@ #define AMD_CPU_ID_CZN AMD_CPU_ID_RN #define AMD_CPU_ID_YC 0x14B5 -#define PMC_MSG_DELAY_MIN_US 100 +#define PMC_MSG_DELAY_MIN_US 50 #define RESPONSE_REGISTER_LOOP_MAX 20000 #define SOC_SUBSYSTEM_IP_MAX 12 From 099f83aa2d06680d5987e43fed1afeda14b5037e Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Tue, 30 Nov 2021 17:08:24 +0100 Subject: [PATCH 069/253] mips, bpf: Fix reference to non-existing Kconfig symbol The Kconfig symbol for R10000 ll/sc errata workaround in the MIPS JIT was misspelled, causing the workaround to not take effect when enabled. Fixes: 72570224bb8f ("mips, bpf: Add JIT workarounds for CPU errata") Reported-by: Lukas Bulwahn Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211130160824.3781635-1-johan.almbladh@anyfinetworks.com --- arch/mips/net/bpf_jit_comp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/net/bpf_jit_comp.h b/arch/mips/net/bpf_jit_comp.h index 6f3a7b07294b..a37fe20818eb 100644 --- a/arch/mips/net/bpf_jit_comp.h +++ b/arch/mips/net/bpf_jit_comp.h @@ -98,7 +98,7 @@ do { \ #define emit(...) __emit(__VA_ARGS__) /* Workaround for R10000 ll/sc errata */ -#ifdef CONFIG_WAR_R10000 +#ifdef CONFIG_WAR_R10000_LLSC #define LLSC_beqz beqzl #else #define LLSC_beqz beqz From 3b9a2d57930372fac8cc0291ed5cdbd443542ed6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 10 Nov 2021 15:19:40 -0800 Subject: [PATCH 070/253] vfio: remove all kernel-doc notation vfio.c abuses (misuses) "/**", which indicates the beginning of kernel-doc notation in the kernel tree. This causes a bunch of kernel-doc complaints about this source file, so quieten all of them by changing all "/**" to "/*". vfio.c:236: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * IOMMU driver registration vfio.c:236: warning: missing initial short description on line: * IOMMU driver registration vfio.c:295: warning: expecting prototype for Container objects(). Prototype was for vfio_container_get() instead vfio.c:317: warning: expecting prototype for Group objects(). Prototype was for __vfio_group_get_from_iommu() instead vfio.c:496: warning: Function parameter or member 'device' not described in 'vfio_device_put' vfio.c:496: warning: expecting prototype for Device objects(). Prototype was for vfio_device_put() instead vfio.c:599: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Async device support vfio.c:599: warning: missing initial short description on line: * Async device support vfio.c:693: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * VFIO driver API vfio.c:693: warning: missing initial short description on line: * VFIO driver API vfio.c:835: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Get a reference to the vfio_device for a device. Even if the vfio.c:835: warning: missing initial short description on line: * Get a reference to the vfio_device for a device. Even if the vfio.c:969: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * VFIO base fd, /dev/vfio/vfio vfio.c:969: warning: missing initial short description on line: * VFIO base fd, /dev/vfio/vfio vfio.c:1187: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * VFIO Group fd, /dev/vfio/$GROUP vfio.c:1187: warning: missing initial short description on line: * VFIO Group fd, /dev/vfio/$GROUP vfio.c:1540: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * VFIO Device fd vfio.c:1540: warning: missing initial short description on line: * VFIO Device fd vfio.c:1615: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * External user API, exported by symbols to be linked dynamically. vfio.c:1615: warning: missing initial short description on line: * External user API, exported by symbols to be linked dynamically. vfio.c:1663: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * External user API, exported by symbols to be linked dynamically. vfio.c:1663: warning: missing initial short description on line: * External user API, exported by symbols to be linked dynamically. vfio.c:1742: warning: Function parameter or member 'caps' not described in 'vfio_info_cap_add' vfio.c:1742: warning: Function parameter or member 'size' not described in 'vfio_info_cap_add' vfio.c:1742: warning: Function parameter or member 'id' not described in 'vfio_info_cap_add' vfio.c:1742: warning: Function parameter or member 'version' not described in 'vfio_info_cap_add' vfio.c:1742: warning: expecting prototype for Sub(). Prototype was for vfio_info_cap_add() instead vfio.c:2276: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Module/class support vfio.c:2276: warning: missing initial short description on line: * Module/class support Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Jason Gunthorpe Cc: Alex Williamson Cc: Eric Auger Cc: Cornelia Huck Cc: kvm@vger.kernel.org Link: https://lore.kernel.org/r/38a9cb92-a473-40bf-b8f9-85cc5cfc2da4@infradead.org Reviewed-by: Jason Gunthorpe Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 82fb75464f92..735d1d344af9 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -232,7 +232,7 @@ static inline bool vfio_iommu_driver_allowed(struct vfio_container *container, } #endif /* CONFIG_VFIO_NOIOMMU */ -/** +/* * IOMMU driver registration */ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) @@ -285,7 +285,7 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb, unsigned long action, void *data); static void vfio_group_get(struct vfio_group *group); -/** +/* * Container objects - containers are created when /dev/vfio/vfio is * opened, but their lifecycle extends until the last user is done, so * it's freed via kref. Must support container/group/device being @@ -309,7 +309,7 @@ static void vfio_container_put(struct vfio_container *container) kref_put(&container->kref, vfio_container_release); } -/** +/* * Group objects - create, release, get, put, search */ static struct vfio_group * @@ -488,7 +488,7 @@ static struct vfio_group *vfio_group_get_from_dev(struct device *dev) return group; } -/** +/* * Device objects - create, release, get, put, search */ /* Device reference always implies a group reference */ @@ -595,7 +595,7 @@ static int vfio_dev_viable(struct device *dev, void *data) return ret; } -/** +/* * Async device support */ static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev) @@ -689,7 +689,7 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb, return NOTIFY_OK; } -/** +/* * VFIO driver API */ void vfio_init_group_dev(struct vfio_device *device, struct device *dev, @@ -831,7 +831,7 @@ int vfio_register_emulated_iommu_dev(struct vfio_device *device) } EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); -/** +/* * Get a reference to the vfio_device for a device. Even if the * caller thinks they own the device, they could be racing with a * release call path, so we can't trust drvdata for the shortcut. @@ -965,7 +965,7 @@ void vfio_unregister_group_dev(struct vfio_device *device) } EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); -/** +/* * VFIO base fd, /dev/vfio/vfio */ static long vfio_ioctl_check_extension(struct vfio_container *container, @@ -1183,7 +1183,7 @@ static const struct file_operations vfio_fops = { .compat_ioctl = compat_ptr_ioctl, }; -/** +/* * VFIO Group fd, /dev/vfio/$GROUP */ static void __vfio_group_unset_container(struct vfio_group *group) @@ -1536,7 +1536,7 @@ static const struct file_operations vfio_group_fops = { .release = vfio_group_fops_release, }; -/** +/* * VFIO Device fd */ static int vfio_device_fops_release(struct inode *inode, struct file *filep) @@ -1611,7 +1611,7 @@ static const struct file_operations vfio_device_fops = { .mmap = vfio_device_fops_mmap, }; -/** +/* * External user API, exported by symbols to be linked dynamically. * * The protocol includes: @@ -1659,7 +1659,7 @@ struct vfio_group *vfio_group_get_external_user(struct file *filep) } EXPORT_SYMBOL_GPL(vfio_group_get_external_user); -/** +/* * External user API, exported by symbols to be linked dynamically. * The external user passes in a device pointer * to verify that: @@ -1725,7 +1725,7 @@ long vfio_external_check_extension(struct vfio_group *group, unsigned long arg) } EXPORT_SYMBOL_GPL(vfio_external_check_extension); -/** +/* * Sub-module support */ /* @@ -2272,7 +2272,7 @@ struct iommu_domain *vfio_group_iommu_domain(struct vfio_group *group) } EXPORT_SYMBOL_GPL(vfio_group_iommu_domain); -/** +/* * Module/class support */ static char *vfio_devnode(struct device *dev, umode_t *mode) From 8704e89349080bd640d1755c46d8cdc359a89748 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 25 Nov 2021 13:13:28 +0800 Subject: [PATCH 071/253] vfio/pci: Fix OpRegion read This is to fix incorrect pointer arithmetic which caused wrong OpRegion version returned, then VM driver got error to get wanted VBT block. We need to be safe to return correct data, so force pointer type for byte access. Fixes: 49ba1a2976c8 ("vfio/pci: Add OpRegion 2.0+ Extended VBT support.") Cc: Colin Xu Cc: Alex Williamson Cc: Dmitry Torokhov Cc: "Xu, Terrence" Cc: "Gao, Fred" Acked-by: Colin Xu Signed-off-by: Zhenyu Wang Link: https://lore.kernel.org/r/20211125051328.3359902-1-zhenyuw@linux.intel.com [aw: line wrap] Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_igd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index 56cd551e0e04..362f91ec8845 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -98,7 +98,8 @@ static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev, version = cpu_to_le16(0x0201); if (igd_opregion_shift_copy(buf, &off, - &version + (pos - OPREGION_VERSION), + (u8 *)&version + + (pos - OPREGION_VERSION), &pos, &remaining, bytes)) return -EFAULT; } @@ -121,7 +122,7 @@ static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev, OPREGION_SIZE : 0); if (igd_opregion_shift_copy(buf, &off, - &rvda + (pos - OPREGION_RVDA), + (u8 *)&rvda + (pos - OPREGION_RVDA), &pos, &remaining, bytes)) return -EFAULT; } From 1d7c29b77725d05faff6754d2f5e7c147aedcf93 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 26 Nov 2021 22:35:45 +0100 Subject: [PATCH 072/253] parisc: Fix KBUILD_IMAGE for self-extracting kernel Default KBUILD_IMAGE to $(boot)/bzImage if a self-extracting (CONFIG_PARISC_SELF_EXTRACT=y) kernel is to be built. This fixes the bindeb-pkg make target. Signed-off-by: Helge Deller Cc: # v4.14+ --- arch/parisc/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 8db4af4879d0..82d77f4b0d08 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -15,7 +15,12 @@ # Mike Shaver, Helge Deller and Martin K. Petersen # +ifdef CONFIG_PARISC_SELF_EXTRACT +boot := arch/parisc/boot +KBUILD_IMAGE := $(boot)/bzImage +else KBUILD_IMAGE := vmlinuz +endif NM = sh $(srctree)/arch/parisc/nm CHECKFLAGS += -D__hppa__=1 From 7e8aeb9d466e1b14d0580b12a0b9f7c702c42f79 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 27 Nov 2021 12:00:36 +0100 Subject: [PATCH 073/253] parisc: Enable sata sil, audit and usb support on 64-bit defconfig Add some more config options which reflect what's needed to boot our 64-bit debian buildds out of the box. Signed-off-by: Helge Deller --- arch/parisc/configs/generic-64bit_defconfig | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index d2daeac2b217..1b8fd80cbe7f 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -1,7 +1,9 @@ CONFIG_LOCALVERSION="-64bit" # CONFIG_LOCALVERSION_AUTO is not set +CONFIG_KERNEL_LZ4=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y @@ -35,6 +37,7 @@ CONFIG_MODVERSIONS=y CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BINFMT_MISC=m # CONFIG_COMPACTION is not set +CONFIG_MEMORY_FAILURE=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -65,12 +68,15 @@ CONFIG_SCSI_ISCSI_ATTRS=y CONFIG_SCSI_SRP_ATTRS=y CONFIG_ISCSI_BOOT_SYSFS=y CONFIG_SCSI_MPT2SAS=y -CONFIG_SCSI_LASI700=m +CONFIG_SCSI_LASI700=y CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_ZALON=y CONFIG_SCSI_QLA_ISCSI=m CONFIG_SCSI_DH=y CONFIG_ATA=y +CONFIG_SATA_SIL=y +CONFIG_SATA_SIS=y +CONFIG_SATA_VIA=y CONFIG_PATA_NS87415=y CONFIG_PATA_SIL680=y CONFIG_ATA_GENERIC=y @@ -79,6 +85,7 @@ CONFIG_MD_LINEAR=m CONFIG_BLK_DEV_DM=m CONFIG_DM_RAID=m CONFIG_DM_UEVENT=y +CONFIG_DM_AUDIT=y CONFIG_FUSION=y CONFIG_FUSION_SPI=y CONFIG_FUSION_SAS=y @@ -196,10 +203,15 @@ CONFIG_FB_MATROX_G=y CONFIG_FB_MATROX_I2C=y CONFIG_FB_MATROX_MAVEN=y CONFIG_FB_RADEON=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_CLUT224 is not set CONFIG_HIDRAW=y CONFIG_HID_PID=y CONFIG_USB_HIDDEV=y CONFIG_USB=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_UIO=y CONFIG_UIO_PDRV_GENIRQ=m CONFIG_UIO_AEC=m From 8d88382b7436551a9ebb78475c546b670790cbf6 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 26 Nov 2021 16:45:59 +0100 Subject: [PATCH 074/253] parisc/agp: Annotate parisc agp init functions with __init Signed-off-by: Helge Deller Reported-by: kernel test robot --- drivers/char/agp/parisc-agp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c index ed3c4c42fc23..d68d05d5d383 100644 --- a/drivers/char/agp/parisc-agp.c +++ b/drivers/char/agp/parisc-agp.c @@ -281,7 +281,7 @@ agp_ioc_init(void __iomem *ioc_regs) return 0; } -static int +static int __init lba_find_capability(int cap) { struct _parisc_agp_info *info = &parisc_agp_info; @@ -366,7 +366,7 @@ fail: return error; } -static int +static int __init find_quicksilver(struct device *dev, void *data) { struct parisc_device **lba = data; @@ -378,7 +378,7 @@ find_quicksilver(struct device *dev, void *data) return 0; } -static int +static int __init parisc_agp_init(void) { extern struct sba_device *sba_list; From 02fe0fbd8a21e183687925c3a266ae27dda9840f Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Fri, 24 Sep 2021 13:15:27 +0200 Subject: [PATCH 075/253] i2c: rk3x: Handle a spurious start completion interrupt flag In a typical read transfer, start completion flag is being set after read finishes (notice ipd bit 4 being set): trasnfer poll=0 i2c start rk3x-i2c fdd40000.i2c: IRQ: state 1, ipd: 10 i2c read rk3x-i2c fdd40000.i2c: IRQ: state 2, ipd: 1b i2c stop rk3x-i2c fdd40000.i2c: IRQ: state 4, ipd: 33 This causes I2C transfer being aborted in polled mode from a stop completion handler: trasnfer poll=1 i2c start rk3x-i2c fdd40000.i2c: IRQ: state 1, ipd: 10 i2c read rk3x-i2c fdd40000.i2c: IRQ: state 2, ipd: 0 rk3x-i2c fdd40000.i2c: IRQ: state 2, ipd: 1b i2c stop rk3x-i2c fdd40000.i2c: IRQ: state 4, ipd: 13 i2c stop rk3x-i2c fdd40000.i2c: unexpected irq in STOP: 0x10 Clearing the START flag after read fixes the issue without any obvious side effects. This issue was dicovered on RK3566 when adding support for powering off the RK817 PMIC. Signed-off-by: Ondrej Jirman Reviewed-by: John Keeping Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-rk3x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c index 819ab4ee517e..02ddb237f69a 100644 --- a/drivers/i2c/busses/i2c-rk3x.c +++ b/drivers/i2c/busses/i2c-rk3x.c @@ -423,8 +423,8 @@ static void rk3x_i2c_handle_read(struct rk3x_i2c *i2c, unsigned int ipd) if (!(ipd & REG_INT_MBRF)) return; - /* ack interrupt */ - i2c_writel(i2c, REG_INT_MBRF, REG_IPD); + /* ack interrupt (read also produces a spurious START flag, clear it too) */ + i2c_writel(i2c, REG_INT_MBRF | REG_INT_START, REG_IPD); /* Can only handle a maximum of 32 bytes at a time */ if (len > 32) From 7d697f0d5737768fa1039b8953b67c08d8d406d1 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 19 Nov 2021 09:08:32 -0800 Subject: [PATCH 076/253] x86/cpu: Drop spurious underscore from RAPTOR_LAKE #define Convention for all the other "lake" CPUs is all one word. So s/RAPTOR_LAKE/RAPTORLAKE/ Fixes: fbdb5e8f2926 ("x86/cpu: Add Raptor Lake to Intel family") Reported-by: Rui Zhang Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Link: https://lkml.kernel.org/r/20211119170832.1034220-1-tony.luck@intel.com --- arch/x86/include/asm/intel-family.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 5a0bcf8b78d7..048b6d5aff50 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -108,7 +108,7 @@ #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ -#define INTEL_FAM6_RAPTOR_LAKE 0xB7 +#define INTEL_FAM6_RAPTORLAKE 0xB7 /* "Small Core" Processors (Atom) */ From b43c2793f5e9910862e8fe07846b74e45b104501 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 26 Nov 2021 13:04:03 +0100 Subject: [PATCH 077/253] netfilter: nfnetlink_queue: silence bogus compiler warning net/netfilter/nfnetlink_queue.c:601:36: warning: variable 'ctinfo' is uninitialized when used here [-Wuninitialized] if (ct && nfnl_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0) ctinfo is only uninitialized if ct == NULL. Init it to 0 to silence this. Reported-by: kernel test robot Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 4acc4b8e9fe5..5837e8efc9c2 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -387,7 +387,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct net_device *indev; struct net_device *outdev; struct nf_conn *ct = NULL; - enum ip_conntrack_info ctinfo; + enum ip_conntrack_info ctinfo = 0; struct nfnl_ct_hook *nfnl_ct; bool csum_verify; char *secdata = NULL; From 52d0b8b18776f184c53632c5e0068201491cdb61 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 26 Nov 2021 13:47:46 +0100 Subject: [PATCH 078/253] x86/fpu/signal: Initialize sw_bytes in save_xstate_epilog() save_sw_bytes() did not fully initialize sw_bytes, which caused KMSAN to report an infoleak (see below). Initialize sw_bytes explicitly to avoid this. KMSAN report follows: ===================================================== BUG: KMSAN: kernel-infoleak in instrument_copy_to_user ./include/linux/instrumented.h:121 BUG: KMSAN: kernel-infoleak in __copy_to_user ./include/linux/uaccess.h:154 BUG: KMSAN: kernel-infoleak in save_xstate_epilog+0x2df/0x510 arch/x86/kernel/fpu/signal.c:127 instrument_copy_to_user ./include/linux/instrumented.h:121 __copy_to_user ./include/linux/uaccess.h:154 save_xstate_epilog+0x2df/0x510 arch/x86/kernel/fpu/signal.c:127 copy_fpstate_to_sigframe+0x861/0xb60 arch/x86/kernel/fpu/signal.c:245 get_sigframe+0x656/0x7e0 arch/x86/kernel/signal.c:296 __setup_rt_frame+0x14d/0x2a60 arch/x86/kernel/signal.c:471 setup_rt_frame arch/x86/kernel/signal.c:781 handle_signal arch/x86/kernel/signal.c:825 arch_do_signal_or_restart+0x417/0xdd0 arch/x86/kernel/signal.c:870 handle_signal_work kernel/entry/common.c:149 exit_to_user_mode_loop+0x1f6/0x490 kernel/entry/common.c:173 exit_to_user_mode_prepare kernel/entry/common.c:208 __syscall_exit_to_user_mode_work kernel/entry/common.c:290 syscall_exit_to_user_mode+0x7e/0xc0 kernel/entry/common.c:302 do_syscall_64+0x60/0xd0 arch/x86/entry/common.c:88 entry_SYSCALL_64_after_hwframe+0x44/0xae ??:? Local variable sw_bytes created at: save_xstate_epilog+0x80/0x510 arch/x86/kernel/fpu/signal.c:121 copy_fpstate_to_sigframe+0x861/0xb60 arch/x86/kernel/fpu/signal.c:245 Bytes 20-47 of 48 are uninitialized Memory access of size 48 starts at ffff8880801d3a18 Data copied to user address 00007ffd90e2ef50 ===================================================== Link: https://lore.kernel.org/all/CAG_fn=V9T6OKPonSjsi9PmWB0hMHFC=yawozdft8i1-MSxrv=w@mail.gmail.com/ Fixes: 53599b4d54b9b8dd ("x86/fpu/signal: Prepare for variable sigframe length") Reported-by: Alexander Potapenko Signed-off-by: Marco Elver Signed-off-by: Alexander Potapenko Signed-off-by: Dave Hansen Tested-by: Alexander Potapenko Link: https://lkml.kernel.org/r/20211126124746.761278-1-glider@google.com --- arch/x86/kernel/fpu/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index d5958278eba6..91d4b6de58ab 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -118,7 +118,7 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame, struct fpstate *fpstate) { struct xregs_state __user *x = buf; - struct _fpx_sw_bytes sw_bytes; + struct _fpx_sw_bytes sw_bytes = {}; u32 xfeatures; int err; From 679d94cd7d900871e5bc9cf780bd5b73af35ab42 Mon Sep 17 00:00:00 2001 From: Guangming Date: Fri, 26 Nov 2021 15:49:04 +0800 Subject: [PATCH 079/253] dma-buf: system_heap: Use 'for_each_sgtable_sg' in pages free flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For previous version, it uses 'sg_table.nent's to traverse sg_table in pages free flow. However, 'sg_table.nents' is reassigned in 'dma_map_sg', it means the number of created entries in the DMA adderess space. So, use 'sg_table.nents' in pages free flow will case some pages can't be freed. Here we should use sg_table.orig_nents to free pages memory, but use the sgtable helper 'for each_sgtable_sg'(, instead of the previous rather common helper 'for_each_sg' which maybe cause memory leak) is much better. Fixes: d963ab0f15fb0 ("dma-buf: system_heap: Allocate higher order pages if available") Signed-off-by: Guangming Reviewed-by: Robin Murphy Cc: # 5.11.* Reviewed-by: Christian König Reviewed-by: John Stultz Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20211126074904.88388-1-guangming.cao@mediatek.com --- drivers/dma-buf/heaps/system_heap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c index f57a39ddd063..ab7fd896d2c4 100644 --- a/drivers/dma-buf/heaps/system_heap.c +++ b/drivers/dma-buf/heaps/system_heap.c @@ -290,7 +290,7 @@ static void system_heap_dma_buf_release(struct dma_buf *dmabuf) int i; table = &buffer->sg_table; - for_each_sg(table->sgl, sg, table->nents, i) { + for_each_sgtable_sg(table, sg, i) { struct page *page = sg_page(sg); __free_pages(page, compound_order(page)); From a44f42ba7f1ad7d3c17bc7d91013fe814a53c5dc Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 30 Nov 2021 16:29:09 -0500 Subject: [PATCH 080/253] drm/i915/dp: Perform 30ms delay after source OUI write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While working on supporting the Intel HDR backlight interface, I noticed that there's a couple of laptops that will very rarely manage to boot up without detecting Intel HDR backlight support - even though it's supported on the system. One example of such a laptop is the Lenovo P17 1st generation. Following some investigation Ville Syrjälä did through the docs they have available to them, they discovered that there's actually supposed to be a 30ms wait after writing the source OUI before we begin setting up the rest of the backlight interface. This seems to be correct, as adding this 30ms delay seems to have completely fixed the probing issues I was previously seeing. So - let's start performing a 30ms wait after writing the OUI, which we do in a manner similar to how we keep track of PPS delays (e.g. record the timestamp of the OUI write, and then wait for however many ms are left since that timestamp right before we interact with the backlight) in order to avoid waiting any longer then we need to. As well, this also avoids us performing this delay on systems where we don't end up using the HDR backlight interface. V3: * Move last_oui_write into intel_dp V2: * Move panel delays into intel_pps Signed-off-by: Lyude Paul Reviewed-by: Jani Nikula Fixes: 4a8d79901d5b ("drm/i915/dp: Enable Intel's HDR backlight interface (only SDR for now)") Cc: Ville Syrjälä Cc: # v5.12+ Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20211130212912.212044-1-lyude@redhat.com (cherry picked from commit c7c90b0b8418a97d3aa8b39aae1992908948efad) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display_types.h | 3 +++ drivers/gpu/drm/i915/display/intel_dp.c | 11 +++++++++++ drivers/gpu/drm/i915/display/intel_dp.h | 2 ++ drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c | 5 +++++ 4 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 39e11eaec1a3..aa7238245b0e 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1640,6 +1640,9 @@ struct intel_dp { struct intel_dp_pcon_frl frl; struct intel_psr psr; + + /* When we last wrote the OUI for eDP */ + unsigned long last_oui_write; }; enum lspcon_vendor { diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index be883469d2fc..a552f05a67e5 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -1955,6 +1956,16 @@ intel_edp_init_source_oui(struct intel_dp *intel_dp, bool careful) if (drm_dp_dpcd_write(&intel_dp->aux, DP_SOURCE_OUI, oui, sizeof(oui)) < 0) drm_err(&i915->drm, "Failed to write source OUI\n"); + + intel_dp->last_oui_write = jiffies; +} + +void intel_dp_wait_source_oui(struct intel_dp *intel_dp) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + + drm_dbg_kms(&i915->drm, "Performing OUI wait\n"); + wait_remaining_ms_from_jiffies(intel_dp->last_oui_write, 30); } /* If the device supports it, try to set the power state appropriately */ diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index ce229026dc91..b64145a3869a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -119,4 +119,6 @@ void intel_dp_pcon_dsc_configure(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); void intel_dp_phy_test(struct intel_encoder *encoder); +void intel_dp_wait_source_oui(struct intel_dp *intel_dp); + #endif /* __INTEL_DP_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index f05b71c01b8e..3897468140e0 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -36,6 +36,7 @@ #include "intel_backlight.h" #include "intel_display_types.h" +#include "intel_dp.h" #include "intel_dp_aux_backlight.h" /* TODO: @@ -106,6 +107,8 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector) int ret; u8 tcon_cap[4]; + intel_dp_wait_source_oui(intel_dp); + ret = drm_dp_dpcd_read(aux, INTEL_EDP_HDR_TCON_CAP0, tcon_cap, sizeof(tcon_cap)); if (ret != sizeof(tcon_cap)) return false; @@ -204,6 +207,8 @@ intel_dp_aux_hdr_enable_backlight(const struct intel_crtc_state *crtc_state, int ret; u8 old_ctrl, ctrl; + intel_dp_wait_source_oui(intel_dp); + ret = drm_dp_dpcd_readb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, &old_ctrl); if (ret != 1) { drm_err(&i915->drm, "Failed to read current backlight control mode: %d\n", ret); From 2c1b5a84669d2477d8fffe9136e86a2cff591729 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Mon, 29 Nov 2021 16:02:48 +0800 Subject: [PATCH 081/253] cpufreq: Fix get_cpu_device() failure in add_cpu_dev_symlink() When I hot added a CPU, I found 'cpufreq' directory was not created below /sys/devices/system/cpu/cpuX/. It is because get_cpu_device() failed in add_cpu_dev_symlink(). cpufreq_add_dev() is the .add_dev callback of a CPU subsys interface. It will be called when the CPU device registered into the system. The call chain is as follows: register_cpu() ->device_register() ->device_add() ->bus_probe_device() ->cpufreq_add_dev() But only after the CPU device has been registered, we can get the CPU device by get_cpu_device(), otherwise it will return NULL. Since we already have the CPU device in cpufreq_add_dev(), pass it to add_cpu_dev_symlink(). I noticed that the 'kobj' of the CPU device has been added into the system before cpufreq_add_dev(). Fixes: 2f0ba790df51 ("cpufreq: Fix creation of symbolic links to policy directories") Signed-off-by: Xiongfeng Wang Acked-by: Viresh Kumar Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e338d2f010fe..22aa2793e4d2 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1004,10 +1004,9 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; -static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu) +static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu, + struct device *dev) { - struct device *dev = get_cpu_device(cpu); - if (unlikely(!dev)) return; @@ -1391,7 +1390,7 @@ static int cpufreq_online(unsigned int cpu) if (new_policy) { for_each_cpu(j, policy->related_cpus) { per_cpu(cpufreq_cpu_data, j) = policy; - add_cpu_dev_symlink(policy, j); + add_cpu_dev_symlink(policy, j, get_cpu_device(j)); } policy->min_freq_req = kzalloc(2 * sizeof(*policy->min_freq_req), @@ -1565,7 +1564,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) /* Create sysfs link on CPU registration */ policy = per_cpu(cpufreq_cpu_data, cpu); if (policy) - add_cpu_dev_symlink(policy, cpu); + add_cpu_dev_symlink(policy, cpu, dev); return 0; } From f751db8adaeaa4a5b200121973633ea45f4bb395 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sat, 27 Nov 2021 00:08:20 +0100 Subject: [PATCH 082/253] powercap/drivers/dtpm: Disable DTPM at boot time The DTPM framework misses a mechanism to set it up. That is currently under review but will come after the next cycle. As the distro are enabling all the kernel options, the DTPM framework is enabled on platforms where the energy model is not implemented, thus making the framework inconsistent and disrupting the CPU frequency scaling service. Remove the initialization at boot time as a hot fix. Fixes: 7a89d7eacf8e ("powercap/drivers/dtpm: Simplify the dtpm table") Signed-off-by: Daniel Lezcano Reported-By: Doug Smythies Tested-By: Doug Smythies Signed-off-by: Rafael J. Wysocki --- drivers/powercap/dtpm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index b9fac786246a..fb35c5828bfb 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -471,9 +471,6 @@ static int __init init_dtpm(void) return PTR_ERR(pct); } - for_each_dtpm_table(dtpm_descr) - dtpm_descr->init(); - return 0; } late_initcall(init_dtpm); From 1e81d3e06de24aedac5c27df13cfa40d912b221b Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Wed, 1 Dec 2021 15:40:20 +0800 Subject: [PATCH 083/253] cpufreq: Fix a comment in cpufreq_policy_free Make the comment in blocking_notifier_call_chain() easier to understand. Signed-off-by: Tang Yizhou Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 22aa2793e4d2..096c3848fa41 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1295,8 +1295,9 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) if (policy->max_freq_req) { /* - * CPUFREQ_CREATE_POLICY notification is sent only after - * successfully adding max_freq_req request. + * Remove max_freq_req after sending CPUFREQ_REMOVE_POLICY + * notification, since CPUFREQ_CREATE_POLICY notification was + * sent after adding max_freq_req earlier. */ blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_REMOVE_POLICY, policy); From a15b8cd77512fd24920f3a6fe1c85f896fb775e8 Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Wed, 1 Dec 2021 15:40:21 +0800 Subject: [PATCH 084/253] cpufreq: docs: Update core.rst As the definition of struct cpufreq_freqs has changed, update core.rst with the new first member of struct cpufreq_freqs. Signed-off-by: Tang Yizhou Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/core.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/cpu-freq/core.rst b/Documentation/cpu-freq/core.rst index 33cb90bd1d8f..4ceef8e7217c 100644 --- a/Documentation/cpu-freq/core.rst +++ b/Documentation/cpu-freq/core.rst @@ -73,12 +73,12 @@ CPUFREQ_POSTCHANGE. The third argument is a struct cpufreq_freqs with the following values: -===== =========================== -cpu number of the affected CPU +====== ====================================== +policy a pointer to the struct cpufreq_policy old old frequency new new frequency flags flags of the cpufreq driver -===== =========================== +====== ====================================== 3. CPUFreq Table Generation with Operating Performance Point (OPP) ================================================================== From 7e4dcc13965c57869684d57a1dc6dd7be589488c Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Fri, 4 Jun 2021 09:53:28 -0700 Subject: [PATCH 085/253] iavf: restore MSI state on reset If the PF experiences an FLR, the VF's MSI and MSI-X configuration will be conveniently and silently removed in the process. When this happens, reset recovery will appear to complete normally but no traffic will pass. The netdev watchdog will helpfully notify everyone of this issue. To prevent such public embarrassment, restore MSI configuration at every reset. For normal resets, this will do no harm, but for VF resets resulting from a PF FLR, this will keep the VF working. Fixes: 5eae00c57f5e ("i40evf: main driver core") Signed-off-by: Mitch Williams Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 14934a7a13ef..cfdbf8c08d18 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2248,6 +2248,7 @@ static void iavf_reset_task(struct work_struct *work) } pci_set_master(adapter->pdev); + pci_restore_msi_state(adapter->pdev); if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n", From da3b36a23bb72e9742bf2f1b3e5da9615480c789 Mon Sep 17 00:00:00 2001 From: Jane Jian Date: Tue, 23 Nov 2021 19:19:40 +0800 Subject: [PATCH 086/253] drm/amdgpu/sriov/vcn: add new vcn ip revision check case for SIENNA_CICHLID [WHY] for sriov odd# vf will modify vcn0 engine ip revision(due to multimedia bandwidth feature), which will be mismatched with original vcn0 revision [HOW] add new version check for vcn0 disabled revision(3, 0, 192), typically modified under sriov mode Signed-off-by: Jane Jian Reviewed-by: Guchun Chen Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 1 + drivers/gpu/drm/amd/amdgpu/nv.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 503995c7ff6c..f6fae79203ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -918,6 +918,7 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 0, 64): case IP_VERSION(3, 1, 1): case IP_VERSION(3, 0, 2): + case IP_VERSION(3, 0, 192): amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); if (!amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 4f7c70845785..585961c2f5f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -135,6 +135,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) break; case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 64): + case IP_VERSION(3, 0, 192): if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) fw_name = FIRMWARE_SIENNA_CICHLID; else diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index a6659d9ecdd2..2ec1ffb36b1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -183,6 +183,7 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, switch (adev->ip_versions[UVD_HWIP][0]) { case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 64): + case IP_VERSION(3, 0, 192): if (amdgpu_sriov_vf(adev)) { if (encode) *codecs = &sriov_sc_video_codecs_encode; From 3e467e478ed3a9701bb588d648d6e0ccb82ced09 Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Wed, 24 Nov 2021 10:33:38 +0800 Subject: [PATCH 087/253] drm/amdgpu: cancel the correct hrtimer on exit Signed-off-by: Flora Cui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index ce982afeff91..ac9a8cd21c4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -504,8 +504,8 @@ static int amdgpu_vkms_sw_fini(void *handle) int i = 0; for (i = 0; i < adev->mode_info.num_crtc; i++) - if (adev->mode_info.crtcs[i]) - hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer); + if (adev->amdgpu_vkms_output[i].vblank_hrtimer.function) + hrtimer_cancel(&adev->amdgpu_vkms_output[i].vblank_hrtimer); kfree(adev->mode_info.bios_hardcoded_edid); kfree(adev->amdgpu_vkms_output); From 1053b9c948e614473819a1a5bcaff6d44e680dcf Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Thu, 18 Nov 2021 16:25:19 +0800 Subject: [PATCH 088/253] drm/amdgpu: check atomic flag to differeniate with legacy path since vkms support atomic KMS interface Signed-off-by: Flora Cui Reviewed-by: Guchun Chen Acked-by: Alex Deucher Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d94fa748e6bb..4b7a69ef721f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3833,7 +3833,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) /* disable all interrupts */ amdgpu_irq_disable_all(adev); if (adev->mode_info.mode_config_initialized){ - if (!amdgpu_device_has_dc_support(adev)) + if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev))) drm_helper_force_disable_all(adev_to_drm(adev)); else drm_atomic_helper_shutdown(adev_to_drm(adev)); @@ -5129,7 +5129,7 @@ skip_hw_reset: drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); } - if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) { + if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) { drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); } From 7551f70ab93d0f3371b28e996f7583e3be1d9a72 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Fri, 26 Nov 2021 13:06:15 +0800 Subject: [PATCH 089/253] drm/amdgpu: fix the missed handling for SDMA2 and SDMA3 There is no base reg offset or ip_version set for SDMA2 and SDMA3 on SIENNA_CICHLID, so add them. Signed-off-by: Guchun Chen Reviewed-by: Kevin Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index f6fae79203ee..ea00090b3fb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -157,6 +157,8 @@ static int hw_id_map[MAX_HWIP] = { [HDP_HWIP] = HDP_HWID, [SDMA0_HWIP] = SDMA0_HWID, [SDMA1_HWIP] = SDMA1_HWID, + [SDMA2_HWIP] = SDMA2_HWID, + [SDMA3_HWIP] = SDMA3_HWID, [MMHUB_HWIP] = MMHUB_HWID, [ATHUB_HWIP] = ATHUB_HWID, [NBIO_HWIP] = NBIF_HWID, From e0570f0b6e2e88be7ef99d1194b153cb054a2107 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 29 Nov 2021 15:57:44 +0800 Subject: [PATCH 090/253] drm/amdgpu: Don't halt RLC on GFX suspend On aldebaran, RLC also controls GFXCLK. Skip halting RLC during GFX IP suspend and keep it running till PMFW disables all DPMs. [ 578.019986] amdgpu 0000:23:00.0: amdgpu: GPU reset begin! [ 583.245566] amdgpu 0000:23:00.0: amdgpu: Failed to disable smu features. [ 583.245621] amdgpu 0000:23:00.0: amdgpu: Fail to disable dpm features! [ 583.245639] [drm:amdgpu_device_ip_suspend_phase2 [amdgpu]] *ERROR* suspend of IP block failed -62 [ 583.248504] [drm] free PSP TMR buffer Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 7 ++++--- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 34478bcc4d09..b305fd39874f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4060,9 +4060,10 @@ static int gfx_v9_0_hw_fini(void *handle) gfx_v9_0_cp_enable(adev, false); - /* Skip suspend with A+A reset */ - if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) { - dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n"); + /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */ + if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) || + (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) { + dev_dbg(adev->dev, "Skipping RLC halt\n"); return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 01168b8955bf..8a3244585d80 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1468,7 +1468,7 @@ static int smu_disable_dpms(struct smu_context *smu) dev_err(adev->dev, "Failed to disable smu features.\n"); } - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0) && + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2) && adev->gfx.rlc.funcs->stop) adev->gfx.rlc.funcs->stop(adev); From 94ebc035456a4ccacfbbef60c444079a256623ad Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 12 Nov 2021 15:27:32 -0500 Subject: [PATCH 091/253] drm/amd/display: Allow DSC on supported MST branch devices [Why] When trying to lightup two 4k60 non-DSC displays behind a branch device that supports DSC we can't lightup both at once due to bandwidth limitations - each requires 48 VCPI slots but we only have 63. [How] The workaround already exists in the code but is guarded by a CONFIG that cannot be set by the user and shouldn't need to be. Check for specific branch device IDs to device whether to enable the workaround for multiple display scenarios. Reviewed-by: Hersen Wu Acked-by: Bhawanpreet Lakha Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 32a5ce09a62a..cc34a35d0bcb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -36,6 +36,8 @@ #include "dm_helpers.h" #include "dc_link_ddc.h" +#include "ddc_service_types.h" +#include "dpcd_defs.h" #include "i2caux_interface.h" #include "dmub_cmd.h" @@ -157,6 +159,16 @@ static const struct drm_connector_funcs dm_dp_mst_connector_funcs = { }; #if defined(CONFIG_DRM_AMD_DC_DCN) +static bool needs_dsc_aux_workaround(struct dc_link *link) +{ + if (link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 && + (link->dpcd_caps.dpcd_rev.raw == DPCD_REV_14 || link->dpcd_caps.dpcd_rev.raw == DPCD_REV_12) && + link->dpcd_caps.sink_count.bits.SINK_COUNT >= 2) + return true; + + return false; +} + static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnector) { struct dc_sink *dc_sink = aconnector->dc_sink; @@ -166,7 +178,7 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto u8 *dsc_branch_dec_caps = NULL; aconnector->dsc_aux = drm_dp_mst_dsc_aux_for_port(port); -#if defined(CONFIG_HP_HOOK_WORKAROUND) + /* * drm_dp_mst_dsc_aux_for_port() will return NULL for certain configs * because it only check the dsc/fec caps of the "port variable" and not the dock @@ -176,10 +188,10 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto * Workaround: explicitly check the use case above and use the mst dock's aux as dsc_aux * */ - - if (!aconnector->dsc_aux && !port->parent->port_parent) + if (!aconnector->dsc_aux && !port->parent->port_parent && + needs_dsc_aux_workaround(aconnector->dc_link)) aconnector->dsc_aux = &aconnector->mst_port->dm_dp_aux.aux; -#endif + if (!aconnector->dsc_aux) return false; From ef548afe05f8d8c5af0fc44b035d5283156f8b03 Mon Sep 17 00:00:00 2001 From: "Shen, George" Date: Mon, 15 Nov 2021 22:38:18 -0500 Subject: [PATCH 092/253] drm/amd/display: Clear DPCD lane settings after repeater training [Why] VS and PE requested by repeater should not persist for the sink. [How] Clear DPCD lane settings after repeater link training finishes. Reviewed-by: Wesley Chalmers Acked-by: Bhawanpreet Lakha Signed-off-by: George Shen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index cb7bf9148904..13bc69d6b679 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2138,7 +2138,7 @@ static enum link_training_result dp_perform_8b_10b_link_training( } for (lane = 0; lane < (uint8_t)lt_settings->link_settings.lane_count; lane++) - lt_settings->dpcd_lane_settings[lane].bits.VOLTAGE_SWING_SET = VOLTAGE_SWING_LEVEL0; + lt_settings->dpcd_lane_settings[lane].raw = 0; } if (status == LINK_TRAINING_SUCCESS) { From 5ceaebcda9061c04f439c93961f0819878365c0f Mon Sep 17 00:00:00 2001 From: Mustapha Ghaddar Date: Mon, 15 Nov 2021 17:56:42 -0500 Subject: [PATCH 093/253] drm/amd/display: Fix for the no Audio bug with Tiled Displays [WHY] It seems like after a series of plug/unplugs we end up in a situation where tiled display doesnt support Audio. [HOW] The issue seems to be related to when we check streams changed after an HPD, we should be checking the audio_struct as well to see if any of its values changed. Reviewed-by: Jun Lei Acked-by: Bhawanpreet Lakha Signed-off-by: Mustapha Ghaddar Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index c32fdccd4d92..dcbfae210493 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1664,6 +1664,10 @@ bool dc_is_stream_unchanged( if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param) return false; + // Only Have Audio left to check whether it is same or not. This is a corner case for Tiled sinks + if (old_stream->audio_info.mode_count != stream->audio_info.mode_count) + return false; + return true; } From c9beecc5c9626ab772160ab3f8e209abc09fa54d Mon Sep 17 00:00:00 2001 From: Jimmy Kizito Date: Sun, 14 Nov 2021 21:48:02 -0500 Subject: [PATCH 094/253] drm/amd/display: Add work around for tunneled MST. [Why] Certain USB4 docks do not seem to be able to handle disabling DSC once it has been enabled on an MST stream. This can result in blank displays. [How] As a work around, always enable DSC on docks exhibiting this issue. The flag to indicate the use of DSC for MST streams on a USB4 dock is set during detection of the dock and only cleared when the USB4 dock is disconnected. Reviewed-by: Jun Lei Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Jimmy Kizito Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 16 +++++++++++++++ .../gpu/drm/amd/display/dc/core/dc_resource.c | 20 +++++++++---------- drivers/gpu/drm/amd/display/dc/dc.h | 3 ++- drivers/gpu/drm/amd/display/dc/dc_link.h | 2 ++ 4 files changed, 30 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 60544788e911..c8457babfdea 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -758,6 +758,18 @@ static bool detect_dp(struct dc_link *link, dal_ddc_service_set_transaction_type(link->ddc, sink_caps->transaction_type); +#if defined(CONFIG_DRM_AMD_DC_DCN) + /* Apply work around for tunneled MST on certain USB4 docks. Always use DSC if dock + * reports DSC support. + */ + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + link->type == dc_connection_mst_branch && + link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 && + link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT && + !link->dc->debug.dpia_debug.bits.disable_mst_dsc_work_around) + link->wa_flags.dpia_mst_dsc_always_on = true; +#endif + #if defined(CONFIG_DRM_AMD_DC_HDCP) /* In case of fallback to SST when topology discovery below fails * HDCP caps will be querried again later by the upper layer (caller @@ -1203,6 +1215,10 @@ static bool dc_link_detect_helper(struct dc_link *link, LINK_INFO("link=%d, mst branch is now Disconnected\n", link->link_index); + /* Disable work around which keeps DSC on for tunneled MST on certain USB4 docks. */ + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + link->wa_flags.dpia_mst_dsc_always_on = false; + dm_helpers_dp_mst_stop_top_mgr(link->ctx, link); link->mst_stream_alloc_table.stream_count = 0; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index dcbfae210493..e2d9a46d0e1a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2256,16 +2256,6 @@ enum dc_status dc_validate_global_state( if (!new_ctx) return DC_ERROR_UNEXPECTED; -#if defined(CONFIG_DRM_AMD_DC_DCN) - - /* - * Update link encoder to stream assignment. - * TODO: Split out reason allocation from validation. - */ - if (dc->res_pool->funcs->link_encs_assign && fast_validate == false) - dc->res_pool->funcs->link_encs_assign( - dc, new_ctx, new_ctx->streams, new_ctx->stream_count); -#endif if (dc->res_pool->funcs->validate_global) { result = dc->res_pool->funcs->validate_global(dc, new_ctx); @@ -2317,6 +2307,16 @@ enum dc_status dc_validate_global_state( if (!dc->res_pool->funcs->validate_bandwidth(dc, new_ctx, fast_validate)) result = DC_FAIL_BANDWIDTH_VALIDATE; +#if defined(CONFIG_DRM_AMD_DC_DCN) + /* + * Only update link encoder to stream assignment after bandwidth validation passed. + * TODO: Split out assignment and validation. + */ + if (result == DC_OK && dc->res_pool->funcs->link_encs_assign && fast_validate == false) + dc->res_pool->funcs->link_encs_assign( + dc, new_ctx, new_ctx->streams, new_ctx->stream_count); +#endif + return result; } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 3aac3f4a2852..618e7989176f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -508,7 +508,8 @@ union dpia_debug_options { uint32_t disable_dpia:1; uint32_t force_non_lttpr:1; uint32_t extend_aux_rd_interval:1; - uint32_t reserved:29; + uint32_t disable_mst_dsc_work_around:1; + uint32_t reserved:28; } bits; uint32_t raw; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 180ecd860296..b01077a6af0e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -191,6 +191,8 @@ struct dc_link { bool dp_skip_DID2; bool dp_skip_reset_segment; bool dp_mot_reset_segment; + /* Some USB4 docks do not handle turning off MST DSC once it has been enabled. */ + bool dpia_mst_dsc_always_on; } wa_flags; struct link_mst_stream_allocation_table mst_stream_alloc_table; From fc2c456ea8329053685db179d30e3ff0c91e5066 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 26 Nov 2021 12:42:50 -0500 Subject: [PATCH 095/253] drm/amdkfd: set "r = 0" explicitly before goto To silence the following Smatch static checker warning: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c:2615 svm_range_restore_pages() warn: missing error code here? 'get_task_mm()' failed. 'r' = '0' Signed-off-by: Philip Yang Suggested-by: Dan Carpenter Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 58b89b53ebe6..dd4715cb7d42 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2614,6 +2614,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, if (atomic_read(&svms->drain_pagefaults)) { pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + r = 0; goto out; } @@ -2623,6 +2624,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, mm = get_task_mm(p->lead_thread); if (!mm) { pr_debug("svms 0x%p failed to get mm\n", svms); + r = 0; goto out; } @@ -2660,6 +2662,7 @@ retry_write_locked: if (svm_range_skip_recover(prange)) { amdgpu_gmc_filter_faults_remove(adev, addr, pasid); + r = 0; goto out_unlock_range; } @@ -2668,6 +2671,7 @@ retry_write_locked: if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) { pr_debug("svms 0x%p [0x%lx %lx] already restored\n", svms, prange->start, prange->last); + r = 0; goto out_unlock_range; } From 494f2e42ce4a9ddffb5d8c5b2db816425ef90397 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 26 Nov 2021 18:43:09 -0500 Subject: [PATCH 096/253] drm/amdkfd: fix double free mem structure drm_gem_object_put calls release_notify callback to free the mem structure and unreserve_mem_limit, move it down after the last access of mem and make it conditional call. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 71a6a9ef54ac..6348559608ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1396,7 +1396,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct sg_table *sg = NULL; uint64_t user_addr = 0; struct amdgpu_bo *bo; - struct drm_gem_object *gobj; + struct drm_gem_object *gobj = NULL; u32 domain, alloc_domain; u64 alloc_flags; int ret; @@ -1506,14 +1506,16 @@ allocate_init_user_pages_failed: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); drm_vma_node_revoke(&gobj->vma_node, drm_priv); err_node_allow: - drm_gem_object_put(gobj); /* Don't unreserve system mem limit twice */ goto err_reserve_limit; err_bo_create: unreserve_mem_limit(adev, size, alloc_domain, !!sg); err_reserve_limit: mutex_destroy(&(*mem)->lock); - kfree(*mem); + if (gobj) + drm_gem_object_put(gobj); + else + kfree(*mem); err: if (sg) { sg_free_table(sg); From 2da34b7bb59e1caa9a336e0e20a76b8b6a4abea2 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Fri, 19 Nov 2021 04:27:55 -0500 Subject: [PATCH 097/253] drm/amd/display: add connector type check for CRC source set [Why] IGT bypass test will set crc source as DPRX,and display DM didn`t check connection type, it run the test on the HDMI connector ,then the kernel will be crashed because aux->transfer is set null for HDMI connection. This patch will skip the invalid connection test and fix kernel crash issue. [How] Check the connector type while setting the pipe crc source as DPRX or auto,if the type is not DP or eDP, the crtc crc source will not be set and report error code to IGT test,IGT will show the this subtest as no valid crtc/connector combinations found. 116.779714] [IGT] amd_bypass: starting subtest 8bpc-bypass-mode [ 117.730996] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 117.731001] #PF: supervisor instruction fetch in kernel mode [ 117.731003] #PF: error_code(0x0010) - not-present page [ 117.731004] PGD 0 P4D 0 [ 117.731006] Oops: 0010 [#1] SMP NOPTI [ 117.731009] CPU: 11 PID: 2428 Comm: amd_bypass Tainted: G OE 5.11.0-34-generic #36~20.04.1-Ubuntu [ 117.731011] Hardware name: AMD CZN/, BIOS AB.FD 09/07/2021 [ 117.731012] RIP: 0010:0x0 [ 117.731015] Code: Unable to access opcode bytes at RIP 0xffffffffffffffd6. [ 117.731016] RSP: 0018:ffffa8d64225bab8 EFLAGS: 00010246 [ 117.731017] RAX: 0000000000000000 RBX: 0000000000000020 RCX: ffffa8d64225bb5e [ 117.731018] RDX: ffff93151d921880 RSI: ffffa8d64225bac8 RDI: ffff931511a1a9d8 [ 117.731022] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 117.731023] CR2: ffffffffffffffd6 CR3: 000000010d5a4000 CR4: 0000000000750ee0 [ 117.731023] PKRU: 55555554 [ 117.731024] Call Trace: [ 117.731027] drm_dp_dpcd_access+0x72/0x110 [drm_kms_helper] [ 117.731036] drm_dp_dpcd_read+0xb7/0xf0 [drm_kms_helper] [ 117.731040] drm_dp_start_crc+0x38/0xb0 [drm_kms_helper] [ 117.731047] amdgpu_dm_crtc_set_crc_source+0x1ae/0x3e0 [amdgpu] [ 117.731149] crtc_crc_open+0x174/0x220 [drm] [ 117.731162] full_proxy_open+0x168/0x1f0 [ 117.731165] ? open_proxy_open+0x100/0x100 BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1546 Reviewed-by: Harry Wentland Reviewed-by: Rodrigo Siqueira Signed-off-by: Perry Yuan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index cce062adc439..8a441a22c46e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -314,6 +314,14 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) ret = -EINVAL; goto cleanup; } + + if ((aconn->base.connector_type != DRM_MODE_CONNECTOR_DisplayPort) && + (aconn->base.connector_type != DRM_MODE_CONNECTOR_eDP)) { + DRM_DEBUG_DRIVER("No DP connector available for CRC source\n"); + ret = -EINVAL; + goto cleanup; + } + } #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) From 428890a3fec131521cc59aac0d3c48bde9d76b7b Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Mon, 29 Nov 2021 21:29:05 -0500 Subject: [PATCH 098/253] drm/amdgpu: adjust the kfd reset sequence in reset sriov function This change revert previous commits: 9f4f2c1a3524 ("drm/amd/amdgpu: fix the kfd pre_reset sequence in sriov") 271fd38ce56d ("drm/amdgpu: move kfd post_reset out of reset_sriov function") This change moves the amdgpu_amdkfd_pre_reset to an earlier place in amdgpu_device_reset_sriov, presumably to address the sequence issue that the first patch was originally meant to fix. Some register access(GRBM_GFX_CNTL) only be allowed on full access mode. Move kfd_pre_reset and kfd_post_reset back inside reset_sriov function. Fixes: 9f4f2c1a3524 ("drm/amd/amdgpu: fix the kfd pre_reset sequence in sriov") Fixes: 271fd38ce56d ("drm/amdgpu: move kfd post_reset out of reset_sriov function") Signed-off-by: shaoyunl Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4b7a69ef721f..1e651b959141 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4289,6 +4289,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, { int r; + amdgpu_amdkfd_pre_reset(adev); + if (from_hypervisor) r = amdgpu_virt_request_full_gpu(adev, true); else @@ -4316,6 +4318,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, amdgpu_irq_gpu_reset_resume_helper(adev); r = amdgpu_ib_ring_tests(adev); + amdgpu_amdkfd_post_reset(adev); error: if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { @@ -5030,7 +5033,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, cancel_delayed_work_sync(&tmp_adev->delayed_init_work); - amdgpu_amdkfd_pre_reset(tmp_adev); + if (!amdgpu_sriov_vf(tmp_adev)) + amdgpu_amdkfd_pre_reset(tmp_adev); /* * Mark these ASICs to be reseted as untracked first @@ -5148,9 +5152,9 @@ skip_hw_reset: skip_sched_resume: list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - /* unlock kfd */ - if (!need_emergency_restart) - amdgpu_amdkfd_post_reset(tmp_adev); + /* unlock kfd: SRIOV would do it separately */ + if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev)) + amdgpu_amdkfd_post_reset(tmp_adev); /* kfd_post_reset will do nothing if kfd device is not initialized, * need to bring up kfd here if it's not be initialized before From 3abfe30d803e62cc75dec254eefab3b04d69219b Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 29 Nov 2021 12:33:05 -0500 Subject: [PATCH 099/253] drm/amdkfd: process_info lock not needed for svm process_info->lock is used to protect kfd_bo_list, vm_list_head, n_vms and userptr valid/inval list, svm_range_restore_work and svm_range_set_attr don't access those, so do not need to take process_info lock. This will avoid potential circular locking issue. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index dd4715cb7d42..3cb4681c5f53 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1574,7 +1574,6 @@ retry_flush_work: static void svm_range_restore_work(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); - struct amdkfd_process_info *process_info; struct svm_range_list *svms; struct svm_range *prange; struct kfd_process *p; @@ -1594,12 +1593,10 @@ static void svm_range_restore_work(struct work_struct *work) * the lifetime of this thread, kfd_process and mm will be valid. */ p = container_of(svms, struct kfd_process, svms); - process_info = p->kgd_process_info; mm = p->mm; if (!mm) return; - mutex_lock(&process_info->lock); svm_range_list_lock_and_flush_work(svms, mm); mutex_lock(&svms->lock); @@ -1652,7 +1649,6 @@ static void svm_range_restore_work(struct work_struct *work) out_reschedule: mutex_unlock(&svms->lock); mmap_write_unlock(mm); - mutex_unlock(&process_info->lock); /* If validation failed, reschedule another attempt */ if (evicted_ranges) { @@ -3181,7 +3177,6 @@ static int svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) { - struct amdkfd_process_info *process_info = p->kgd_process_info; struct mm_struct *mm = current->mm; struct list_head update_list; struct list_head insert_list; @@ -3200,8 +3195,6 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, svms = &p->svms; - mutex_lock(&process_info->lock); - svm_range_list_lock_and_flush_work(svms, mm); r = svm_range_is_valid(p, start, size); @@ -3277,8 +3270,6 @@ out_unlock_range: mutex_unlock(&svms->lock); mmap_read_unlock(mm); out: - mutex_unlock(&process_info->lock); - pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid, &p->svms, start, start + size - 1, r); From c7719e79347803b8e3b6b50da8c6db410a3012b5 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 17 Nov 2021 10:37:50 +0800 Subject: [PATCH 100/253] x86/tsc: Add a timer to make sure TSC_adjust is always checked The TSC_ADJUST register is checked every time a CPU enters idle state, but Thomas Gleixner mentioned there is still a caveat that a system won't enter idle [1], either because it's too busy or configured purposely to not enter idle. Setup a periodic timer (every 10 minutes) to make sure the check is happening on a regular base. [1] https://lore.kernel.org/lkml/875z286xtk.fsf@nanos.tec.linutronix.de/ Fixes: 6e3cd95234dc ("x86/hpet: Use another crystalball to evaluate HPET usability") Requested-by: Thomas Gleixner Signed-off-by: Feng Tang Signed-off-by: Thomas Gleixner Cc: "Paul E. McKenney" Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211117023751.24190-1-feng.tang@intel.com --- arch/x86/kernel/tsc_sync.c | 41 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 50a4515fe0ad..9452dc9664b5 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -30,6 +30,7 @@ struct tsc_adjust { }; static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust); +static struct timer_list tsc_sync_check_timer; /* * TSC's on different sockets may be reset asynchronously. @@ -77,6 +78,46 @@ void tsc_verify_tsc_adjust(bool resume) } } +/* + * Normally the tsc_sync will be checked every time system enters idle + * state, but there is still caveat that a system won't enter idle, + * either because it's too busy or configured purposely to not enter + * idle. + * + * So setup a periodic timer (every 10 minutes) to make sure the check + * is always on. + */ + +#define SYNC_CHECK_INTERVAL (HZ * 600) + +static void tsc_sync_check_timer_fn(struct timer_list *unused) +{ + int next_cpu; + + tsc_verify_tsc_adjust(false); + + /* Run the check for all onlined CPUs in turn */ + next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(cpu_online_mask); + + tsc_sync_check_timer.expires += SYNC_CHECK_INTERVAL; + add_timer_on(&tsc_sync_check_timer, next_cpu); +} + +static int __init start_sync_check_timer(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_TSC_ADJUST) || tsc_clocksource_reliable) + return 0; + + timer_setup(&tsc_sync_check_timer, tsc_sync_check_timer_fn, 0); + tsc_sync_check_timer.expires = jiffies + SYNC_CHECK_INTERVAL; + add_timer(&tsc_sync_check_timer); + + return 0; +} +late_initcall(start_sync_check_timer); + static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval, unsigned int cpu, bool bootcpu) { From b50db7095fe002fa3e16605546cba66bf1b68a3e Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 17 Nov 2021 10:37:51 +0800 Subject: [PATCH 101/253] x86/tsc: Disable clocksource watchdog for TSC on qualified platorms There are cases that the TSC clocksource is wrongly judged as unstable by the clocksource watchdog mechanism which tries to validate the TSC against HPET, PM_TIMER or jiffies. While there is hardly a general reliable way to check the validity of a watchdog, Thomas Gleixner proposed [1]: "I'm inclined to lift that requirement when the CPU has: 1) X86_FEATURE_CONSTANT_TSC 2) X86_FEATURE_NONSTOP_TSC 3) X86_FEATURE_NONSTOP_TSC_S3 4) X86_FEATURE_TSC_ADJUST 5) At max. 4 sockets After two decades of horrors we're finally at a point where TSC seems to be halfway reliable and less abused by BIOS tinkerers. TSC_ADJUST was really key as we can now detect even small modifications reliably and the important point is that we can cure them as well (not pretty but better than all other options)." As feature #3 X86_FEATURE_NONSTOP_TSC_S3 only exists on several generations of Atom processorz, and is always coupled with X86_FEATURE_CONSTANT_TSC and X86_FEATURE_NONSTOP_TSC, skip checking it, and also be more defensive to use maximal 2 sockets. The check is done inside tsc_init() before registering 'tsc-early' and 'tsc' clocksources, as there were cases that both of them had been wrongly judged as unreliable. For more background of tsc/watchdog, there is a good summary in [2] [tglx} Update vs. jiffies: On systems where the only remaining clocksource aside of TSC is jiffies there is no way to make this work because that creates a circular dependency. Jiffies accuracy depends on not missing a periodic timer interrupt, which is not guaranteed. That could be detected by TSC, but as TSC is not trusted this cannot be compensated. The consequence is a circulus vitiosus which results in shutting down TSC and falling back to the jiffies clocksource which is even more unreliable. [1]. https://lore.kernel.org/lkml/87eekfk8bd.fsf@nanos.tec.linutronix.de/ [2]. https://lore.kernel.org/lkml/87a6pimt1f.ffs@nanos.tec.linutronix.de/ [ tglx: Refine comment and amend changelog ] Fixes: 6e3cd95234dc ("x86/hpet: Use another crystalball to evaluate HPET usability") Suggested-by: Thomas Gleixner Signed-off-by: Feng Tang Signed-off-by: Thomas Gleixner Cc: "Paul E. McKenney" Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211117023751.24190-2-feng.tang@intel.com --- arch/x86/kernel/tsc.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 2e076a459a0c..a698196377be 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1180,6 +1180,12 @@ void mark_tsc_unstable(char *reason) EXPORT_SYMBOL_GPL(mark_tsc_unstable); +static void __init tsc_disable_clocksource_watchdog(void) +{ + clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY; + clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; +} + static void __init check_system_tsc_reliable(void) { #if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) @@ -1196,6 +1202,23 @@ static void __init check_system_tsc_reliable(void) #endif if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) tsc_clocksource_reliable = 1; + + /* + * Disable the clocksource watchdog when the system has: + * - TSC running at constant frequency + * - TSC which does not stop in C-States + * - the TSC_ADJUST register which allows to detect even minimal + * modifications + * - not more than two sockets. As the number of sockets cannot be + * evaluated at the early boot stage where this has to be + * invoked, check the number of online memory nodes as a + * fallback solution which is an reasonable estimate. + */ + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && + boot_cpu_has(X86_FEATURE_NONSTOP_TSC) && + boot_cpu_has(X86_FEATURE_TSC_ADJUST) && + nr_online_nodes <= 2) + tsc_disable_clocksource_watchdog(); } /* @@ -1387,9 +1410,6 @@ static int __init init_tsc_clocksource(void) if (tsc_unstable) goto unreg; - if (tsc_clocksource_reliable || no_tsc_watchdog) - clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; - if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; @@ -1527,7 +1547,7 @@ void __init tsc_init(void) } if (tsc_clocksource_reliable || no_tsc_watchdog) - clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY; + tsc_disable_clocksource_watchdog(); clocksource_register_khz(&clocksource_tsc_early, tsc_khz); detect_art(); From e445976537ad139162980bee015b7364e5b64fff Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 1 Dec 2021 09:31:03 -0800 Subject: [PATCH 102/253] xfs: remove incorrect ASSERT in xfs_rename This ASSERT in xfs_rename is a) incorrect, because (RENAME_WHITEOUT|RENAME_NOREPLACE) is a valid combination, and b) unnecessary, because actual invalid flag combinations are already handled at the vfs level in do_renameat2() before we get called. So, remove it. Reported-by: Paolo Bonzini Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_inode.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 64b9bf334806..6771f357ad2c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3122,7 +3122,6 @@ xfs_rename( * appropriately. */ if (flags & RENAME_WHITEOUT) { - ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE))); error = xfs_rename_alloc_whiteout(mnt_userns, target_dp, &wip); if (error) return error; From 06d5d558f5a30582546dcbe9327601af867ce1c9 Mon Sep 17 00:00:00 2001 From: Yang Guang Date: Tue, 30 Nov 2021 08:04:11 +0800 Subject: [PATCH 103/253] ata: replace snprintf in show functions with sysfs_emit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit coccinelle report: ./drivers/ata/libata-sata.c:830:8-16: WARNING: use scnprintf or sprintf Use sysfs_emit instead of scnprintf or sprintf makes more sense. Reported-by: Zeal Robot Signed-off-by: Yang Guang Signed-off-by: Damien Le Moal --- drivers/ata/libata-sata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index 5b78e86e3459..b9c77885b872 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -827,7 +827,7 @@ static ssize_t ata_scsi_lpm_show(struct device *dev, if (ap->target_lpm_policy >= ARRAY_SIZE(ata_lpm_policy_names)) return -EINVAL; - return snprintf(buf, PAGE_SIZE, "%s\n", + return sysfs_emit(buf, "%s\n", ata_lpm_policy_names[ap->target_lpm_policy]); } DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR, From cb1d220da0faa5ca0deb93449aff953f0c2cce6d Mon Sep 17 00:00:00 2001 From: Like Xu Date: Thu, 18 Nov 2021 21:03:20 +0800 Subject: [PATCH 104/253] KVM: x86/pmu: Fix reserved bits for AMD PerfEvtSeln register If we run the following perf command in an AMD Milan guest: perf stat \ -e cpu/event=0x1d0/ \ -e cpu/event=0x1c7/ \ -e cpu/umask=0x1f,event=0x18e/ \ -e cpu/umask=0x7,event=0x18e/ \ -e cpu/umask=0x18,event=0x18e/ \ ./workload dmesg will report a #GP warning from an unchecked MSR access error on MSR_F15H_PERF_CTLx. This is because according to APM (Revision: 4.03) Figure 13-7, the bits [35:32] of AMD PerfEvtSeln register is a part of the event select encoding, which extends the EVENT_SELECT field from 8 bits to 12 bits. Opportunistically update pmu->reserved_bits for reserved bit 19. Reported-by: Jim Mattson Fixes: ca724305a2b0 ("KVM: x86/vPMU: Implement AMD vPMU code for KVM") Signed-off-by: Like Xu Message-Id: <20211118130320.95997-1-likexu@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 871c426ec389..b4095dfeeee6 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -281,7 +281,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS; pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; - pmu->reserved_bits = 0xffffffff00200000ull; + pmu->reserved_bits = 0xfffffff000280000ull; pmu->version = 1; /* not applicable to AMD; but clean them to prevent any fall out */ pmu->counter_bitmask[KVM_PMC_FIXED] = 0; From ef8b4b7203682cc9adb37c8336d3f0f3b80bc382 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 30 Nov 2021 07:37:45 -0500 Subject: [PATCH 105/253] KVM: ensure APICv is considered inactive if there is no APIC kvm_vcpu_apicv_active() returns false if a virtual machine has no in-kernel local APIC, however kvm_apicv_activated might still be true if there are no reasons to disable APICv; in fact it is quite likely that there is none because APICv is inhibited by specific configurations of the local APIC and those configurations cannot be programmed. This triggers a WARN: WARN_ON_ONCE(kvm_apicv_activated(vcpu->kvm) != kvm_vcpu_apicv_active(vcpu)); To avoid this, introduce another cause for APICv inhibition, namely the absence of an in-kernel local APIC. This cause is enabled by default, and is dropped by either KVM_CREATE_IRQCHIP or the enabling of KVM_CAP_IRQCHIP_SPLIT. Reported-by: Ignat Korchagin Fixes: ee49a8932971 ("KVM: x86: Move SVM's APICv sanity check to common x86", 2021-10-22) Reviewed-by: Maxim Levitsky Reviewed-by: Sean Christopherson Tested-by: Ignat Korchagin Message-Id: <20211130123746.293379-1-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/avic.c | 1 + arch/x86/kvm/vmx/vmx.c | 1 + arch/x86/kvm/x86.c | 9 +++++---- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6ac61f85e07b..860ed500580c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1036,6 +1036,7 @@ struct kvm_x86_msr_filter { #define APICV_INHIBIT_REASON_PIT_REINJ 4 #define APICV_INHIBIT_REASON_X2APIC 5 #define APICV_INHIBIT_REASON_BLOCKIRQ 6 +#define APICV_INHIBIT_REASON_ABSENT 7 struct kvm_arch { unsigned long n_used_mmu_pages; diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 9d6066eb7c10..8f9af7b7dbbe 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -900,6 +900,7 @@ out: bool svm_check_apicv_inhibit_reasons(ulong bit) { ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | + BIT(APICV_INHIBIT_REASON_ABSENT) | BIT(APICV_INHIBIT_REASON_HYPERV) | BIT(APICV_INHIBIT_REASON_NESTED) | BIT(APICV_INHIBIT_REASON_IRQWIN) | diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f90448809690..9453743ce0c4 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7525,6 +7525,7 @@ static void hardware_unsetup(void) static bool vmx_check_apicv_inhibit_reasons(ulong bit) { ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | + BIT(APICV_INHIBIT_REASON_ABSENT) | BIT(APICV_INHIBIT_REASON_HYPERV) | BIT(APICV_INHIBIT_REASON_BLOCKIRQ); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ee1a039b490..e0aa4dd53c7f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5740,6 +5740,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, smp_wmb(); kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT; kvm->arch.nr_reserved_ioapic_pins = cap->args[0]; + kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT); r = 0; split_irqchip_unlock: mutex_unlock(&kvm->lock); @@ -6120,6 +6121,7 @@ set_identity_unlock: /* Write kvm->irq_routing before enabling irqchip_in_kernel. */ smp_wmb(); kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL; + kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT); create_irqchip_unlock: mutex_unlock(&kvm->lock); break; @@ -8818,10 +8820,9 @@ static void kvm_apicv_init(struct kvm *kvm) { init_rwsem(&kvm->arch.apicv_update_lock); - if (enable_apicv) - clear_bit(APICV_INHIBIT_REASON_DISABLE, - &kvm->arch.apicv_inhibit_reasons); - else + set_bit(APICV_INHIBIT_REASON_ABSENT, + &kvm->arch.apicv_inhibit_reasons); + if (!enable_apicv) set_bit(APICV_INHIBIT_REASON_DISABLE, &kvm->arch.apicv_inhibit_reasons); } From bfbb307c628676929c2d329da0daf9d22afa8ad2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 30 Nov 2021 15:53:37 +0300 Subject: [PATCH 106/253] KVM: VMX: Set failure code in prepare_vmcs02() The error paths in the prepare_vmcs02() function are supposed to set *entry_failure_code but this path does not. It leads to using an uninitialized variable in the caller. Fixes: 71f7347025bf ("KVM: nVMX: Load GUEST_IA32_PERF_GLOBAL_CTRL MSR on VM-Entry") Signed-off-by: Dan Carpenter Message-Id: <20211130125337.GB24578@kili> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 64f2828035c2..9c941535f78c 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2591,8 +2591,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, - vmcs12->guest_ia32_perf_global_ctrl))) + vmcs12->guest_ia32_perf_global_ctrl))) { + *entry_failure_code = ENTRY_FAIL_DEFAULT; return -EINVAL; + } kvm_rsp_write(vcpu, vmcs12->guest_rsp); kvm_rip_write(vcpu, vmcs12->guest_rip); From a955cad84cdaffa282b3cf8f5ce69e9e5655e585 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 20 Nov 2021 04:50:22 +0000 Subject: [PATCH 107/253] KVM: x86/mmu: Retry page fault if root is invalidated by memslot update Bail from the page fault handler if the root shadow page was obsoleted by a memslot update. Do the check _after_ acuiring mmu_lock, as the TDP MMU doesn't rely on the memslot/MMU generation, and instead relies on the root being explicit marked invalid by kvm_mmu_zap_all_fast(), which takes mmu_lock for write. For the TDP MMU, inserting a SPTE into an obsolete root can leak a SP if kvm_tdp_mmu_zap_invalidated_roots() has already zapped the SP, i.e. has moved past the gfn associated with the SP. For other MMUs, the resulting behavior is far more convoluted, though unlikely to be truly problematic. Installing SPs/SPTEs into the obsolete root isn't directly problematic, as the obsolete root will be unloaded and dropped before the vCPU re-enters the guest. But because the legacy MMU tracks shadow pages by their role, any SP created by the fault can can be reused in the new post-reload root. Again, that _shouldn't_ be problematic as any leaf child SPTEs will be created for the current/valid memslot generation, and kvm_mmu_get_page() will not reuse child SPs from the old generation as they will be flagged as obsolete. But, given that continuing with the fault is pointess (the root will be unloaded), apply the check to all MMUs. Fixes: b7cccd397f31 ("KVM: x86/mmu: Fast invalidation for TDP MMU") Cc: stable@vger.kernel.org Cc: Ben Gardon Signed-off-by: Sean Christopherson Message-Id: <20211120045046.3940942-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 23 +++++++++++++++++++++-- arch/x86/kvm/mmu/paging_tmpl.h | 3 ++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6354297e92ae..e2e1d012df22 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1936,7 +1936,11 @@ static void mmu_audit_disable(void) { } static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp) { - return sp->role.invalid || + if (sp->role.invalid) + return true; + + /* TDP MMU pages due not use the MMU generation. */ + return !sp->tdp_mmu_page && unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); } @@ -3976,6 +3980,20 @@ out_retry: return true; } +/* + * Returns true if the page fault is stale and needs to be retried, i.e. if the + * root was invalidated by a memslot update or a relevant mmu_notifier fired. + */ +static bool is_page_fault_stale(struct kvm_vcpu *vcpu, + struct kvm_page_fault *fault, int mmu_seq) +{ + if (is_obsolete_sp(vcpu->kvm, to_shadow_page(vcpu->arch.mmu->root_hpa))) + return true; + + return fault->slot && + mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva); +} + static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { bool is_tdp_mmu_fault = is_tdp_mmu(vcpu->arch.mmu); @@ -4013,8 +4031,9 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault else write_lock(&vcpu->kvm->mmu_lock); - if (fault->slot && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva)) + if (is_page_fault_stale(vcpu, fault, mmu_seq)) goto out_unlock; + r = make_mmu_pages_available(vcpu); if (r) goto out_unlock; diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index f87d36898c44..708a5d297fe1 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -911,7 +911,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault r = RET_PF_RETRY; write_lock(&vcpu->kvm->mmu_lock); - if (fault->slot && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva)) + + if (is_page_fault_stale(vcpu, fault, mmu_seq)) goto out_unlock; kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); From ce39d473d1edd6914e1eed097deb0c0612baa8f6 Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Wed, 1 Dec 2021 11:40:10 +0800 Subject: [PATCH 108/253] arm64: update PAC description for kernel Remove the paragraph which has nothing to do with the kernel and add PAC description related to kernel. Suggested-by: Mark Rutland Signed-off-by: Kuan-Ying Lee Link: https://lore.kernel.org/r/20211201034014.20048-1-Kuan-Ying.Lee@mediatek.com Signed-off-by: Will Deacon --- Documentation/arm64/pointer-authentication.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Documentation/arm64/pointer-authentication.rst b/Documentation/arm64/pointer-authentication.rst index f127666ea3a8..e5dad2e40aa8 100644 --- a/Documentation/arm64/pointer-authentication.rst +++ b/Documentation/arm64/pointer-authentication.rst @@ -53,11 +53,10 @@ The number of bits that the PAC occupies in a pointer is 55 minus the virtual address size configured by the kernel. For example, with a virtual address size of 48, the PAC is 7 bits wide. -Recent versions of GCC can compile code with APIAKey-based return -address protection when passed the -msign-return-address option. This -uses instructions in the HINT space (unless -march=armv8.3-a or higher -is also passed), and such code can run on systems without the pointer -authentication extension. +When ARM64_PTR_AUTH_KERNEL is selected, the kernel will be compiled +with HINT space pointer authentication instructions protecting +function returns. Kernels built with this option will work on hardware +with or without pointer authentication support. In addition to exec(), keys can also be reinitialized to random values using the PR_PAC_RESET_KEYS prctl. A bitmask of PR_PAC_APIAKEY, From 2f2183243f52a8ee77eecba4796316606701d101 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 30 Nov 2021 12:18:49 +0000 Subject: [PATCH 109/253] arm64: kexec: use __pa_symbol(empty_zero_page) In machine_kexec_post_load() we use __pa() on `empty_zero_page`, so that we can use the physical address during arm64_relocate_new_kernel() to switch TTBR1 to a new set of tables. While `empty_zero_page` is part of the old kernel, we won't clobber it until after this switch, so using it is benign. However, `empty_zero_page` is part of the kernel image rather than a linear map address, so it is not correct to use __pa(x), and we should instead use __pa_symbol(x) or __pa(lm_alias(x)). Otherwise, when the kernel is built with DEBUG_VIRTUAL, we'll encounter splats as below, as I've seen when fuzzing v5.16-rc3 with Syzkaller: | ------------[ cut here ]------------ | virt_to_phys used for non-linear address: 000000008492561a (empty_zero_page+0x0/0x1000) | WARNING: CPU: 3 PID: 11492 at arch/arm64/mm/physaddr.c:15 __virt_to_phys+0x120/0x1c0 arch/arm64/mm/physaddr.c:12 | CPU: 3 PID: 11492 Comm: syz-executor.0 Not tainted 5.16.0-rc3-00001-g48bd452a045c #1 | Hardware name: linux,dummy-virt (DT) | pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : __virt_to_phys+0x120/0x1c0 arch/arm64/mm/physaddr.c:12 | lr : __virt_to_phys+0x120/0x1c0 arch/arm64/mm/physaddr.c:12 | sp : ffff80001af17bb0 | x29: ffff80001af17bb0 x28: ffff1cc65207b400 x27: ffffb7828730b120 | x26: 0000000000000e11 x25: 0000000000000000 x24: 0000000000000001 | x23: ffffb7828963e000 x22: ffffb78289644000 x21: 0000600000000000 | x20: 000000000000002d x19: 0000b78289644000 x18: 0000000000000000 | x17: 74706d6528206131 x16: 3635323934383030 x15: 303030303030203a | x14: 1ffff000035e2eb8 x13: ffff6398d53f4f0f x12: 1fffe398d53f4f0e | x11: 1fffe398d53f4f0e x10: ffff6398d53f4f0e x9 : ffffb7827c6f76dc | x8 : ffff1cc6a9fa7877 x7 : 0000000000000001 x6 : ffff6398d53f4f0f | x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff1cc66f2a99c0 | x2 : 0000000000040000 x1 : d7ce7775b09b5d00 x0 : 0000000000000000 | Call trace: | __virt_to_phys+0x120/0x1c0 arch/arm64/mm/physaddr.c:12 | machine_kexec_post_load+0x284/0x670 arch/arm64/kernel/machine_kexec.c:150 | do_kexec_load+0x570/0x670 kernel/kexec.c:155 | __do_sys_kexec_load kernel/kexec.c:250 [inline] | __se_sys_kexec_load kernel/kexec.c:231 [inline] | __arm64_sys_kexec_load+0x1d8/0x268 kernel/kexec.c:231 | __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] | invoke_syscall+0x90/0x2e0 arch/arm64/kernel/syscall.c:52 | el0_svc_common.constprop.2+0x1e4/0x2f8 arch/arm64/kernel/syscall.c:142 | do_el0_svc+0xf8/0x150 arch/arm64/kernel/syscall.c:181 | el0_svc+0x60/0x248 arch/arm64/kernel/entry-common.c:603 | el0t_64_sync_handler+0x90/0xb8 arch/arm64/kernel/entry-common.c:621 | el0t_64_sync+0x180/0x184 arch/arm64/kernel/entry.S:572 | irq event stamp: 2428 | hardirqs last enabled at (2427): [] __up_console_sem+0xf0/0x118 kernel/printk/printk.c:255 | hardirqs last disabled at (2428): [] el1_dbg+0x28/0x80 arch/arm64/kernel/entry-common.c:375 | softirqs last enabled at (2424): [] softirq_handle_end kernel/softirq.c:401 [inline] | softirqs last enabled at (2424): [] __do_softirq+0xa28/0x11e4 kernel/softirq.c:587 | softirqs last disabled at (2417): [] do_softirq_own_stack include/asm-generic/softirq_stack.h:10 [inline] | softirqs last disabled at (2417): [] invoke_softirq kernel/softirq.c:439 [inline] | softirqs last disabled at (2417): [] __irq_exit_rcu kernel/softirq.c:636 [inline] | softirqs last disabled at (2417): [] irq_exit_rcu+0x53c/0x688 kernel/softirq.c:648 | ---[ end trace 0ca578534e7ca938 ]--- With or without DEBUG_VIRTUAL __pa() will fall back to __kimg_to_phys() for non-linear addresses, and will happen to do the right thing in this case, even with the warning. But we should not depend upon this, and to keep the warning useful we should fix this case. Fix this issue by using __pa_symbol(), which handles kernel image addresses (and checks its input is a kernel image address). This matches what we do elsewhere, e.g. in arch/arm64/include/asm/pgtable.h: | #define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page)) Fixes: 3744b5280e67 ("arm64: kexec: install a copy of the linear-map") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Pasha Tatashin Cc: Will Deacon Reviewed-by: Pasha Tatashin Link: https://lore.kernel.org/r/20211130121849.3319010-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 1038494135c8..6fb31c117ebe 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -147,7 +147,7 @@ int machine_kexec_post_load(struct kimage *kimage) if (rc) return rc; kimage->arch.ttbr1 = __pa(trans_pgd); - kimage->arch.zero_page = __pa(empty_zero_page); + kimage->arch.zero_page = __pa_symbol(empty_zero_page); reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start; memcpy(reloc_code, __relocate_new_kernel_start, reloc_size); From 35b6b28e69985eafb20b3b2c7bd6eca452b56b53 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 29 Nov 2021 13:57:09 +0000 Subject: [PATCH 110/253] arm64: ftrace: add missing BTIs When branch target identifiers are in use, code reachable via an indirect branch requires a BTI landing pad at the branch target site. When building FTRACE_WITH_REGS atop patchable-function-entry, we miss BTIs at the start start of the `ftrace_caller` and `ftrace_regs_caller` trampolines, and when these are called from a module via a PLT (which will use a `BR X16`), we will encounter a BTI failure, e.g. | # insmod lkdtm.ko | lkdtm: No crash points registered, enable through debugfs | # echo function_graph > /sys/kernel/debug/tracing/current_tracer | # cat /sys/kernel/debug/provoke-crash/DIRECT | Unhandled 64-bit el1h sync exception on CPU0, ESR 0x34000001 -- BTI | CPU: 0 PID: 174 Comm: cat Not tainted 5.16.0-rc2-dirty #3 | Hardware name: linux,dummy-virt (DT) | pstate: 60400405 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=jc) | pc : ftrace_caller+0x0/0x3c | lr : lkdtm_debugfs_open+0xc/0x20 [lkdtm] | sp : ffff800012e43b00 | x29: ffff800012e43b00 x28: 0000000000000000 x27: ffff800012e43c88 | x26: 0000000000000000 x25: 0000000000000000 x24: ffff0000c171f200 | x23: ffff0000c27b1e00 x22: ffff0000c2265240 x21: ffff0000c23c8c30 | x20: ffff8000090ba380 x19: 0000000000000000 x18: 0000000000000000 | x17: 0000000000000000 x16: ffff80001002bb4c x15: 0000000000000000 | x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000900ff0 | x11: ffff0000c4166310 x10: ffff800012e43b00 x9 : ffff8000104f2384 | x8 : 0000000000000001 x7 : 0000000000000000 x6 : 000000000000003f | x5 : 0000000000000040 x4 : ffff800012e43af0 x3 : 0000000000000001 | x2 : ffff8000090b0000 x1 : ffff0000c171f200 x0 : ffff0000c23c8c30 | Kernel panic - not syncing: Unhandled exception | CPU: 0 PID: 174 Comm: cat Not tainted 5.16.0-rc2-dirty #3 | Hardware name: linux,dummy-virt (DT) | Call trace: | dump_backtrace+0x0/0x1a4 | show_stack+0x24/0x30 | dump_stack_lvl+0x68/0x84 | dump_stack+0x1c/0x38 | panic+0x168/0x360 | arm64_exit_nmi.isra.0+0x0/0x80 | el1h_64_sync_handler+0x68/0xd4 | el1h_64_sync+0x78/0x7c | ftrace_caller+0x0/0x3c | do_dentry_open+0x134/0x3b0 | vfs_open+0x38/0x44 | path_openat+0x89c/0xe40 | do_filp_open+0x8c/0x13c | do_sys_openat2+0xbc/0x174 | __arm64_sys_openat+0x6c/0xbc | invoke_syscall+0x50/0x120 | el0_svc_common.constprop.0+0xdc/0x100 | do_el0_svc+0x84/0xa0 | el0_svc+0x28/0x80 | el0t_64_sync_handler+0xa8/0x130 | el0t_64_sync+0x1a0/0x1a4 | SMP: stopping secondary CPUs | Kernel Offset: disabled | CPU features: 0x0,00000f42,da660c5f | Memory Limit: none | ---[ end Kernel panic - not syncing: Unhandled exception ]--- Fix this by adding the required `BTI C`, as we only require these to be reachable via BL for direct calls or BR X16/X17 for PLTs. For now, these are open-coded in the function prologue, matching the style of the `__hwasan_tag_mismatch` trampoline. In future we may wish to consider adding a new SYM_CODE_START_*() variant which has an implicit BTI. When ftrace is built atop mcount, the trampolines are marked with SYM_FUNC_START(), and so get an implicit BTI. We may need to change these over to SYM_CODE_START() in future for RELIABLE_STACKTRACE, in case we need to apply special care aroud the return address being rewritten. Fixes: 97fed779f2a6 ("arm64: bti: Provide Kconfig for kernel mode BTI") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20211129135709.2274019-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-ftrace.S | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index b3e4f9a088b1..8cf970d219f5 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -77,11 +77,17 @@ .endm SYM_CODE_START(ftrace_regs_caller) +#ifdef BTI_C + BTI_C +#endif ftrace_regs_entry 1 b ftrace_common SYM_CODE_END(ftrace_regs_caller) SYM_CODE_START(ftrace_caller) +#ifdef BTI_C + BTI_C +#endif ftrace_regs_entry 0 b ftrace_common SYM_CODE_END(ftrace_caller) From e11b02df60bdf2ac6bb8eaed525db4f02415a902 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 22 Nov 2021 17:32:35 +0100 Subject: [PATCH 111/253] gfs2: Fix remote demote of weak glock holders When we mock up a temporary holder in gfs2_glock_cb to demote weak holders in response to a remote locking conflict, we don't set the HIF_HOLDER flag. This causes function may_grant to BUG. Fix by setting the missing HIF_HOLDER flag in the mock glock holder. In addition, define the mock glock holder where it is used. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 8dbd6fe66420..44a7a4288956 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1857,7 +1857,6 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) { - struct gfs2_holder mock_gh = { .gh_gl = gl, .gh_state = state, }; unsigned long delay = 0; unsigned long holdtime; unsigned long now = jiffies; @@ -1890,8 +1889,13 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) * keep the glock until the last strong holder is done with it. */ if (!find_first_strong_holder(gl)) { - if (state == LM_ST_UNLOCKED) - mock_gh.gh_state = LM_ST_EXCLUSIVE; + struct gfs2_holder mock_gh = { + .gh_gl = gl, + .gh_state = (state == LM_ST_UNLOCKED) ? + LM_ST_EXCLUSIVE : state, + .gh_iflags = BIT(HIF_HOLDER) + }; + demote_incompat_holders(gl, &mock_gh); } handle_callback(gl, state, delay, true); From b8e12e3599ad61b25478159b290d94d66326b3c3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 29 Nov 2021 03:35:00 +0100 Subject: [PATCH 112/253] gfs2: gfs2_inode_lookup cleanup In gfs2_inode_lookup, once the inode has been looked up, we check if the inode generation (no_formal_ino) is the one we're looking for. If it isn't and the inode wasn't in the inode cache, we discard the newly looked up inode. This is unnecessary, complicates the code, and makes future changes to gfs2_inode_lookup harder, so change the code to retain newly looked up inodes instead. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 6424b903e885..806357f0c7ee 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -208,20 +208,15 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, gfs2_glock_dq_uninit(&i_gh); gfs2_set_iop(inode); + unlock_new_inode(inode); } if (no_formal_ino && ip->i_no_formal_ino && no_formal_ino != ip->i_no_formal_ino) { - error = -ESTALE; - if (inode->i_state & I_NEW) - goto fail; iput(inode); - return ERR_PTR(error); + return ERR_PTR(-ESTALE); } - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - return inode; fail: From 5f6e13baebf31d71779617b45fbe88ed62f121dc Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 29 Nov 2021 10:50:41 +0100 Subject: [PATCH 113/253] gfs2: gfs2_inode_lookup rework Rework gfs2_inode_lookup() to only set up the new inode's glocks after verifying that the new inode is valid. There is no need for flushing the inode glock work queue anymore now, so remove that as well. While at it, get rid of the useless wrapper around iget5_locked() and its unnecessary is_bad_inode() check. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 84 +++++++++++++++++++------------------------------ 1 file changed, 33 insertions(+), 51 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 806357f0c7ee..d73b2933fdb8 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -40,37 +40,6 @@ static const struct inode_operations gfs2_file_iops; static const struct inode_operations gfs2_dir_iops; static const struct inode_operations gfs2_symlink_iops; -static int iget_test(struct inode *inode, void *opaque) -{ - u64 no_addr = *(u64 *)opaque; - - return GFS2_I(inode)->i_no_addr == no_addr; -} - -static int iget_set(struct inode *inode, void *opaque) -{ - u64 no_addr = *(u64 *)opaque; - - GFS2_I(inode)->i_no_addr = no_addr; - inode->i_ino = no_addr; - return 0; -} - -static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) -{ - struct inode *inode; - -repeat: - inode = iget5_locked(sb, no_addr, iget_test, iget_set, &no_addr); - if (!inode) - return inode; - if (is_bad_inode(inode)) { - iput(inode); - goto repeat; - } - return inode; -} - /** * gfs2_set_iop - Sets inode operations * @inode: The inode with correct i_mode filled in @@ -104,6 +73,22 @@ static void gfs2_set_iop(struct inode *inode) } } +static int iget_test(struct inode *inode, void *opaque) +{ + u64 no_addr = *(u64 *)opaque; + + return GFS2_I(inode)->i_no_addr == no_addr; +} + +static int iget_set(struct inode *inode, void *opaque) +{ + u64 no_addr = *(u64 *)opaque; + + GFS2_I(inode)->i_no_addr = no_addr; + inode->i_ino = no_addr; + return 0; +} + /** * gfs2_inode_lookup - Lookup an inode * @sb: The super block @@ -132,12 +117,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, { struct inode *inode; struct gfs2_inode *ip; - struct gfs2_glock *io_gl = NULL; struct gfs2_holder i_gh; int error; gfs2_holder_mark_uninitialized(&i_gh); - inode = gfs2_iget(sb, no_addr); + inode = iget5_locked(sb, no_addr, iget_test, iget_set, &no_addr); if (!inode) return ERR_PTR(-ENOMEM); @@ -145,22 +129,16 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, if (inode->i_state & I_NEW) { struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_glock *io_gl; error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); if (unlikely(error)) goto fail; - flush_delayed_work(&ip->i_gl->gl_work); - - error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); - if (unlikely(error)) - goto fail; - if (blktype != GFS2_BLKST_UNLINKED) - gfs2_cancel_delete_work(io_gl); if (type == DT_UNKNOWN || blktype != GFS2_BLKST_FREE) { /* * The GL_SKIP flag indicates to skip reading the inode - * block. We read the inode with gfs2_inode_refresh + * block. We read the inode when instantiating it * after possibly checking the block type. */ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, @@ -181,24 +159,31 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, } } - glock_set_object(ip->i_gl, ip); set_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags); - error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); + + error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); if (unlikely(error)) goto fail; - glock_set_object(ip->i_iopen_gh.gh_gl, ip); + if (blktype != GFS2_BLKST_UNLINKED) + gfs2_cancel_delete_work(io_gl); + error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); gfs2_glock_put(io_gl); - io_gl = NULL; + if (unlikely(error)) + goto fail; /* Lowest possible timestamp; will be overwritten in gfs2_dinode_in. */ inode->i_atime.tv_sec = 1LL << (8 * sizeof(inode->i_atime.tv_sec) - 1); inode->i_atime.tv_nsec = 0; + glock_set_object(ip->i_gl, ip); + if (type == DT_UNKNOWN) { /* Inode glock must be locked already */ error = gfs2_instantiate(&i_gh); - if (error) + if (error) { + glock_clear_object(ip->i_gl, ip); goto fail; + } } else { ip->i_no_formal_ino = no_formal_ino; inode->i_mode = DT2IF(type); @@ -206,6 +191,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, if (gfs2_holder_initialized(&i_gh)) gfs2_glock_dq_uninit(&i_gh); + glock_set_object(ip->i_iopen_gh.gh_gl, ip); gfs2_set_iop(inode); unlock_new_inode(inode); @@ -220,12 +206,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, return inode; fail: - if (gfs2_holder_initialized(&ip->i_iopen_gh)) { - glock_clear_object(ip->i_iopen_gh.gh_gl, ip); + if (gfs2_holder_initialized(&ip->i_iopen_gh)) gfs2_glock_dq_uninit(&ip->i_iopen_gh); - } - if (io_gl) - gfs2_glock_put(io_gl); if (gfs2_holder_initialized(&i_gh)) gfs2_glock_dq_uninit(&i_gh); iget_failed(inode); From 3d36e57ff768dbb919c06ffedec4bfe4587c6254 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 30 Nov 2021 18:26:15 +0100 Subject: [PATCH 114/253] gfs2: gfs2_create_inode rework When gfs2_lookup_by_inum() calls gfs2_inode_lookup() for an uncached inode, gfs2_inode_lookup() will place a new tentative inode into the inode cache before verifying that there is a valid inode at the given address. This can race with gfs2_create_inode() which doesn't check for duplicates inodes. gfs2_create_inode() will try to assign the new inode to the corresponding inode glock, and glock_set_object() will complain that the glock is still in use by gfs2_inode_lookup's tentative inode. We noticed this bug after adding commit 486408d690e1 ("gfs2: Cancel remote delete work asynchronously") which allowed delete_work_func() to race with gfs2_create_inode(), but the same race exists for open-by-handle. Fix that by switching from insert_inode_hash() to insert_inode_locked4(), which does check for duplicate inodes. We know we've just managed to to allocate the new inode, so an inode tentatively created by gfs2_inode_lookup() will eventually go away and insert_inode_locked4() will always succeed. In addition, don't flush the inode glock work anymore (this can now only make things worse) and clean up glock_{set,clear}_object for the inode glock somewhat. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index d73b2933fdb8..89905f4f29bb 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -707,18 +707,19 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); if (error) goto fail_free_inode; - flush_delayed_work(&ip->i_gl->gl_work); error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl); if (error) goto fail_free_inode; gfs2_cancel_delete_work(io_gl); + error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr); + BUG_ON(error); + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); if (error) goto fail_gunlock2; - glock_set_object(ip->i_gl, ip); error = gfs2_trans_begin(sdp, blocks, 0); if (error) goto fail_gunlock2; @@ -734,9 +735,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock2; + glock_set_object(ip->i_gl, ip); glock_set_object(io_gl, ip); gfs2_set_iop(inode); - insert_inode_hash(inode); free_vfs_inode = 0; /* After this point, the inode is no longer considered free. Any failures need to undo @@ -778,17 +779,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, gfs2_glock_dq_uninit(ghs + 1); gfs2_glock_put(io_gl); gfs2_qa_put(dip); + unlock_new_inode(inode); return error; fail_gunlock3: + glock_clear_object(ip->i_gl, ip); glock_clear_object(io_gl, ip); gfs2_glock_dq_uninit(&ip->i_iopen_gh); fail_gunlock2: - glock_clear_object(io_gl, ip); gfs2_glock_put(io_gl); fail_free_inode: if (ip->i_gl) { - glock_clear_object(ip->i_gl, ip); if (free_vfs_inode) /* else evict will do the put for us */ gfs2_glock_put(ip->i_gl); } @@ -806,7 +807,10 @@ fail_gunlock: mark_inode_dirty(inode); set_bit(free_vfs_inode ? GIF_FREE_VFS_INODE : GIF_ALLOC_FAILED, &GFS2_I(inode)->i_flags); - iput(inode); + if (inode->i_state & I_NEW) + iget_failed(inode); + else + iput(inode); } if (gfs2_holder_initialized(ghs + 1)) gfs2_glock_dq_uninit(ghs + 1); From 53e87e3cdc155f20c3417b689df8d2ac88d79576 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 26 Oct 2021 16:10:54 +0200 Subject: [PATCH 115/253] timers/nohz: Last resort update jiffies on nohz_full IRQ entry When at least one CPU runs in nohz_full mode, a dedicated timekeeper CPU is guaranteed to stay online and to never stop its tick. Meanwhile on some rare case, the dedicated timekeeper may be running with interrupts disabled for a while, such as in stop_machine. If jiffies stop being updated, a nohz_full CPU may end up endlessly programming the next tick in the past, taking the last jiffies update monotonic timestamp as a stale base, resulting in an tick storm. Here is a scenario where it matters: 0) CPU 0 is the timekeeper and CPU 1 a nohz_full CPU. 1) A stop machine callback is queued to execute somewhere. 2) CPU 0 reaches MULTI_STOP_DISABLE_IRQ while CPU 1 is still in MULTI_STOP_PREPARE. Hence CPU 0 can't do its timekeeping duty. CPU 1 can still take IRQs. 3) CPU 1 receives an IRQ which queues a timer callback one jiffy forward. 4) On IRQ exit, CPU 1 schedules the tick one jiffy forward, taking last_jiffies_update as a base. But last_jiffies_update hasn't been updated for 2 jiffies since the timekeeper has interrupts disabled. 5) clockevents_program_event(), which relies on ktime_get(), observes that the expiration is in the past and therefore programs the min delta event on the clock. 6) The tick fires immediately, goto 3) 7) Tick storm, the nohz_full CPU is drown and takes ages to reach MULTI_STOP_DISABLE_IRQ, which is the only way out of this situation. Solve this with unconditionally updating jiffies if the value is stale on nohz_full IRQ entry. IRQs and other disturbances are expected to be rare enough on nohz_full for the unconditional call to ktime_get() to actually matter. Reported-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Tested-by: Paul E. McKenney Link: https://lore.kernel.org/r/20211026141055.57358-2-frederic@kernel.org --- kernel/softirq.c | 3 ++- kernel/time/tick-sched.c | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index 322b65d45676..41f470929e99 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -595,7 +595,8 @@ void irq_enter_rcu(void) { __irq_enter_raw(); - if (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET)) + if (tick_nohz_full_cpu(smp_processor_id()) || + (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET))) tick_irq_enter(); account_hardirq_enter(current); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6bffe5af8cb1..17a283ce2b20 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1375,6 +1375,13 @@ static inline void tick_nohz_irq_enter(void) now = ktime_get(); if (ts->idle_active) tick_nohz_stop_idle(ts, now); + /* + * If all CPUs are idle. We may need to update a stale jiffies value. + * Note nohz_full is a special case: a timekeeper is guaranteed to stay + * alive but it might be busy looping with interrupts disabled in some + * rare case (typically stop machine). So we must make sure we have a + * last resort. + */ if (ts->tick_stopped) tick_nohz_update_jiffies(now); } From e7f2be115f0746b969c0df14c0d182f65f005ca5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 26 Oct 2021 16:10:55 +0200 Subject: [PATCH 116/253] sched/cputime: Fix getrusage(RUSAGE_THREAD) with nohz_full getrusage(RUSAGE_THREAD) with nohz_full may return shorter utime/stime than the actual time. task_cputime_adjusted() snapshots utime and stime and then adjust their sum to match the scheduler maintained cputime.sum_exec_runtime. Unfortunately in nohz_full, sum_exec_runtime is only updated once per second in the worst case, causing a discrepancy against utime and stime that can be updated anytime by the reader using vtime. To fix this situation, perform an update of cputime.sum_exec_runtime when the cputime snapshot reports the task as actually running while the tick is disabled. The related overhead is then contained within the relevant situations. Reported-by: Hasegawa Hitomi Signed-off-by: Frederic Weisbecker Signed-off-by: Hasegawa Hitomi Signed-off-by: Thomas Gleixner Tested-by: Masayoshi Mizuma Acked-by: Phil Auld Link: https://lore.kernel.org/r/20211026141055.57358-3-frederic@kernel.org --- include/linux/sched/cputime.h | 5 +++-- kernel/sched/cputime.c | 12 +++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index 6c9f19a33865..ce3c58286062 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -18,15 +18,16 @@ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -extern void task_cputime(struct task_struct *t, +extern bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime); extern u64 task_gtime(struct task_struct *t); #else -static inline void task_cputime(struct task_struct *t, +static inline bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime) { *utime = t->utime; *stime = t->stime; + return false; } static inline u64 task_gtime(struct task_struct *t) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 872e481d5098..9392aea1804e 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -615,7 +615,8 @@ void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) .sum_exec_runtime = p->se.sum_exec_runtime, }; - task_cputime(p, &cputime.utime, &cputime.stime); + if (task_cputime(p, &cputime.utime, &cputime.stime)) + cputime.sum_exec_runtime = task_sched_runtime(p); cputime_adjust(&cputime, &p->prev_cputime, ut, st); } EXPORT_SYMBOL_GPL(task_cputime_adjusted); @@ -828,19 +829,21 @@ u64 task_gtime(struct task_struct *t) * add up the pending nohz execution time since the last * cputime snapshot. */ -void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) +bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime) { struct vtime *vtime = &t->vtime; unsigned int seq; u64 delta; + int ret; if (!vtime_accounting_enabled()) { *utime = t->utime; *stime = t->stime; - return; + return false; } do { + ret = false; seq = read_seqcount_begin(&vtime->seqcount); *utime = t->utime; @@ -850,6 +853,7 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) if (vtime->state < VTIME_SYS) continue; + ret = true; delta = vtime_delta(vtime); /* @@ -861,6 +865,8 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) else *utime += vtime->utime + delta; } while (read_seqcount_retry(&vtime->seqcount, seq)); + + return ret; } static int vtime_state_fetch(struct vtime *vtime, int cpu) From f83baa0cb6cfc92ebaf7f9d3a99d7e34f2e77a8a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 Dec 2021 19:35:01 +0100 Subject: [PATCH 117/253] HID: add hid_is_usb() function to make it simpler for USB detection A number of HID drivers already call hid_is_using_ll_driver() but only for the detection of if this is a USB device or not. Make this more obvious by creating hid_is_usb() and calling the function that way. Also converts the existing hid_is_using_ll_driver() functions to use the new call. Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: linux-input@vger.kernel.org Cc: stable@vger.kernel.org Tested-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211201183503.2373082-1-gregkh@linuxfoundation.org --- drivers/hid/hid-asus.c | 6 ++---- drivers/hid/hid-logitech-dj.c | 2 +- drivers/hid/hid-u2fzero.c | 2 +- drivers/hid/hid-uclogic-params.c | 3 +-- drivers/hid/wacom_sys.c | 2 +- include/linux/hid.h | 5 +++++ 6 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index f3ecddc519ee..08c9a9a60ae4 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1028,8 +1028,7 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id) if (drvdata->quirks & QUIRK_IS_MULTITOUCH) drvdata->tp = &asus_i2c_tp; - if ((drvdata->quirks & QUIRK_T100_KEYBOARD) && - hid_is_using_ll_driver(hdev, &usb_hid_driver)) { + if ((drvdata->quirks & QUIRK_T100_KEYBOARD) && hid_is_usb(hdev)) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); if (intf->altsetting->desc.bInterfaceNumber == T100_TPAD_INTF) { @@ -1057,8 +1056,7 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id) drvdata->tp = &asus_t100chi_tp; } - if ((drvdata->quirks & QUIRK_MEDION_E1239T) && - hid_is_using_ll_driver(hdev, &usb_hid_driver)) { + if ((drvdata->quirks & QUIRK_MEDION_E1239T) && hid_is_usb(hdev)) { struct usb_host_interface *alt = to_usb_interface(hdev->dev.parent)->altsetting; diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index a0017b010c34..7106b921b53c 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -1777,7 +1777,7 @@ static int logi_dj_probe(struct hid_device *hdev, case recvr_type_bluetooth: no_dj_interfaces = 2; break; case recvr_type_dinovo: no_dj_interfaces = 2; break; } - if (hid_is_using_ll_driver(hdev, &usb_hid_driver)) { + if (hid_is_usb(hdev)) { intf = to_usb_interface(hdev->dev.parent); if (intf && intf->altsetting->desc.bInterfaceNumber >= no_dj_interfaces) { diff --git a/drivers/hid/hid-u2fzero.c b/drivers/hid/hid-u2fzero.c index 31ea7fc69916..ad489caf53ad 100644 --- a/drivers/hid/hid-u2fzero.c +++ b/drivers/hid/hid-u2fzero.c @@ -311,7 +311,7 @@ static int u2fzero_probe(struct hid_device *hdev, unsigned int minor; int ret; - if (!hid_is_using_ll_driver(hdev, &usb_hid_driver)) + if (!hid_is_usb(hdev)) return -EINVAL; dev = devm_kzalloc(&hdev->dev, sizeof(*dev), GFP_KERNEL); diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c index 3d67b748a3b9..adff1bd68d9f 100644 --- a/drivers/hid/hid-uclogic-params.c +++ b/drivers/hid/hid-uclogic-params.c @@ -843,8 +843,7 @@ int uclogic_params_init(struct uclogic_params *params, struct uclogic_params p = {0, }; /* Check arguments */ - if (params == NULL || hdev == NULL || - !hid_is_using_ll_driver(hdev, &usb_hid_driver)) { + if (params == NULL || hdev == NULL || !hid_is_usb(hdev)) { rc = -EINVAL; goto cleanup; } diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 2717d39600b4..22d73772fbc5 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2214,7 +2214,7 @@ static void wacom_update_name(struct wacom *wacom, const char *suffix) if ((features->type == HID_GENERIC) && !strcmp("Wacom HID", features->name)) { char *product_name = wacom->hdev->name; - if (hid_is_using_ll_driver(wacom->hdev, &usb_hid_driver)) { + if (hid_is_usb(wacom->hdev)) { struct usb_interface *intf = to_usb_interface(wacom->hdev->dev.parent); struct usb_device *dev = interface_to_usbdev(intf); product_name = dev->product; diff --git a/include/linux/hid.h b/include/linux/hid.h index 9e067f937dbc..f453be385bd4 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -840,6 +840,11 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev, return hdev->ll_driver == driver; } +static inline bool hid_is_usb(struct hid_device *hdev) +{ + return hid_is_using_ll_driver(hdev, &usb_hid_driver); +} + #define PM_HINT_FULLON 1<<5 #define PM_HINT_NORMAL 1<<1 From 720ac467204a70308bd687927ed475afb904e11b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 Dec 2021 19:35:02 +0100 Subject: [PATCH 118/253] HID: wacom: fix problems when device is not a valid USB device The wacom driver accepts devices of more than just USB types, but some code paths can cause problems if the device being controlled is not a USB device due to a lack of checking. Add the needed checks to ensure that the USB device accesses are only happening on a "real" USB device, and not one on some other bus. Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: linux-input@vger.kernel.org Cc: stable@vger.kernel.org Tested-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211201183503.2373082-2-gregkh@linuxfoundation.org --- drivers/hid/wacom_sys.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 22d73772fbc5..066c567dbaa2 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -726,7 +726,7 @@ static void wacom_retrieve_hid_descriptor(struct hid_device *hdev, * Skip the query for this type and modify defaults based on * interface number. */ - if (features->type == WIRELESS) { + if (features->type == WIRELESS && intf) { if (intf->cur_altsetting->desc.bInterfaceNumber == 0) features->device_type = WACOM_DEVICETYPE_WL_MONITOR; else @@ -2451,6 +2451,9 @@ static void wacom_wireless_work(struct work_struct *work) wacom_destroy_battery(wacom); + if (!usbdev) + return; + /* Stylus interface */ hdev1 = usb_get_intfdata(usbdev->config->interface[1]); wacom1 = hid_get_drvdata(hdev1); @@ -2730,8 +2733,6 @@ static void wacom_mode_change_work(struct work_struct *work) static int wacom_probe(struct hid_device *hdev, const struct hid_device_id *id) { - struct usb_interface *intf = to_usb_interface(hdev->dev.parent); - struct usb_device *dev = interface_to_usbdev(intf); struct wacom *wacom; struct wacom_wac *wacom_wac; struct wacom_features *features; @@ -2766,8 +2767,14 @@ static int wacom_probe(struct hid_device *hdev, wacom_wac->hid_data.inputmode = -1; wacom_wac->mode_report = -1; - wacom->usbdev = dev; - wacom->intf = intf; + if (hid_is_usb(hdev)) { + struct usb_interface *intf = to_usb_interface(hdev->dev.parent); + struct usb_device *dev = interface_to_usbdev(intf); + + wacom->usbdev = dev; + wacom->intf = intf; + } + mutex_init(&wacom->lock); INIT_DELAYED_WORK(&wacom->init_work, wacom_init_work); INIT_WORK(&wacom->wireless_work, wacom_wireless_work); From 93020953d0fa7035fd036ad87a47ae2b7aa4ae33 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 Dec 2021 19:35:03 +0100 Subject: [PATCH 119/253] HID: check for valid USB device for many HID drivers Many HID drivers assume that the HID device assigned to them is a USB device as that was the only way HID devices used to be able to be created in Linux. However, with the additional ways that HID devices can be created for many different bus types, that is no longer true, so properly check that we have a USB device associated with the HID device before allowing a driver that makes this assumption to claim it. Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: Michael Zaidman Cc: Stefan Achatz Cc: Maxime Coquelin Cc: Alexandre Torgue Cc: linux-input@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Tested-by: Benjamin Tissoires [bentiss: amended for thrustmater.c hunk to apply] Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211201183503.2373082-3-gregkh@linuxfoundation.org --- drivers/hid/hid-chicony.c | 3 +++ drivers/hid/hid-corsair.c | 7 ++++++- drivers/hid/hid-elan.c | 2 +- drivers/hid/hid-elo.c | 3 +++ drivers/hid/hid-ft260.c | 3 +++ drivers/hid/hid-holtek-kbd.c | 9 +++++++-- drivers/hid/hid-holtek-mouse.c | 9 +++++++++ drivers/hid/hid-lg.c | 10 ++++++++-- drivers/hid/hid-prodikeys.c | 10 ++++++++-- drivers/hid/hid-roccat-arvo.c | 3 +++ drivers/hid/hid-roccat-isku.c | 3 +++ drivers/hid/hid-roccat-kone.c | 3 +++ drivers/hid/hid-roccat-koneplus.c | 3 +++ drivers/hid/hid-roccat-konepure.c | 3 +++ drivers/hid/hid-roccat-kovaplus.c | 3 +++ drivers/hid/hid-roccat-lua.c | 3 +++ drivers/hid/hid-roccat-pyra.c | 3 +++ drivers/hid/hid-roccat-ryos.c | 3 +++ drivers/hid/hid-roccat-savu.c | 3 +++ drivers/hid/hid-samsung.c | 3 +++ drivers/hid/hid-sony.c | 6 +++++- drivers/hid/hid-thrustmaster.c | 3 +++ drivers/hid/hid-uclogic-core.c | 3 +++ 23 files changed, 92 insertions(+), 9 deletions(-) diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c index ca556d39da2a..f04d2aa23efe 100644 --- a/drivers/hid/hid-chicony.c +++ b/drivers/hid/hid-chicony.c @@ -114,6 +114,9 @@ static int ch_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; + if (!hid_is_usb(hdev)) + return -EINVAL; + hdev->quirks |= HID_QUIRK_INPUT_PER_APP; ret = hid_parse(hdev); if (ret) { diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c index 902a60e249ed..8c895c820b67 100644 --- a/drivers/hid/hid-corsair.c +++ b/drivers/hid/hid-corsair.c @@ -553,7 +553,12 @@ static int corsair_probe(struct hid_device *dev, const struct hid_device_id *id) int ret; unsigned long quirks = id->driver_data; struct corsair_drvdata *drvdata; - struct usb_interface *usbif = to_usb_interface(dev->dev.parent); + struct usb_interface *usbif; + + if (!hid_is_usb(dev)) + return -EINVAL; + + usbif = to_usb_interface(dev->dev.parent); drvdata = devm_kzalloc(&dev->dev, sizeof(struct corsair_drvdata), GFP_KERNEL); diff --git a/drivers/hid/hid-elan.c b/drivers/hid/hid-elan.c index 021049805bb7..3091355d48df 100644 --- a/drivers/hid/hid-elan.c +++ b/drivers/hid/hid-elan.c @@ -50,7 +50,7 @@ struct elan_drvdata { static int is_not_elan_touchpad(struct hid_device *hdev) { - if (hdev->bus == BUS_USB) { + if (hid_is_usb(hdev)) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); return (intf->altsetting->desc.bInterfaceNumber != diff --git a/drivers/hid/hid-elo.c b/drivers/hid/hid-elo.c index 383dfda8c12f..8e960d7b233b 100644 --- a/drivers/hid/hid-elo.c +++ b/drivers/hid/hid-elo.c @@ -230,6 +230,9 @@ static int elo_probe(struct hid_device *hdev, const struct hid_device_id *id) int ret; struct usb_device *udev; + if (!hid_is_usb(hdev)) + return -EINVAL; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; diff --git a/drivers/hid/hid-ft260.c b/drivers/hid/hid-ft260.c index 8ee77f4afe9f..79505c64dbfe 100644 --- a/drivers/hid/hid-ft260.c +++ b/drivers/hid/hid-ft260.c @@ -915,6 +915,9 @@ static int ft260_probe(struct hid_device *hdev, const struct hid_device_id *id) struct ft260_get_chip_version_report version; int ret; + if (!hid_is_usb(hdev)) + return -EINVAL; + dev = devm_kzalloc(&hdev->dev, sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; diff --git a/drivers/hid/hid-holtek-kbd.c b/drivers/hid/hid-holtek-kbd.c index 0a38e8e9bc78..403506b9697e 100644 --- a/drivers/hid/hid-holtek-kbd.c +++ b/drivers/hid/hid-holtek-kbd.c @@ -140,12 +140,17 @@ static int holtek_kbd_input_event(struct input_dev *dev, unsigned int type, static int holtek_kbd_probe(struct hid_device *hdev, const struct hid_device_id *id) { - struct usb_interface *intf = to_usb_interface(hdev->dev.parent); - int ret = hid_parse(hdev); + struct usb_interface *intf; + int ret; + if (!hid_is_usb(hdev)) + return -EINVAL; + + ret = hid_parse(hdev); if (!ret) ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); + intf = to_usb_interface(hdev->dev.parent); if (!ret && intf->cur_altsetting->desc.bInterfaceNumber == 1) { struct hid_input *hidinput; list_for_each_entry(hidinput, &hdev->inputs, list) { diff --git a/drivers/hid/hid-holtek-mouse.c b/drivers/hid/hid-holtek-mouse.c index 195b735b001d..b7172c48ef9f 100644 --- a/drivers/hid/hid-holtek-mouse.c +++ b/drivers/hid/hid-holtek-mouse.c @@ -62,6 +62,14 @@ static __u8 *holtek_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, return rdesc; } +static int holtek_mouse_probe(struct hid_device *hdev, + const struct hid_device_id *id) +{ + if (!hid_is_usb(hdev)) + return -EINVAL; + return 0; +} + static const struct hid_device_id holtek_mouse_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A067) }, @@ -83,6 +91,7 @@ static struct hid_driver holtek_mouse_driver = { .name = "holtek_mouse", .id_table = holtek_mouse_devices, .report_fixup = holtek_mouse_report_fixup, + .probe = holtek_mouse_probe, }; module_hid_driver(holtek_mouse_driver); diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c index d40af911df63..fb3f7258009c 100644 --- a/drivers/hid/hid-lg.c +++ b/drivers/hid/hid-lg.c @@ -749,12 +749,18 @@ static int lg_raw_event(struct hid_device *hdev, struct hid_report *report, static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id) { - struct usb_interface *iface = to_usb_interface(hdev->dev.parent); - __u8 iface_num = iface->cur_altsetting->desc.bInterfaceNumber; + struct usb_interface *iface; + __u8 iface_num; unsigned int connect_mask = HID_CONNECT_DEFAULT; struct lg_drv_data *drv_data; int ret; + if (!hid_is_usb(hdev)) + return -EINVAL; + + iface = to_usb_interface(hdev->dev.parent); + iface_num = iface->cur_altsetting->desc.bInterfaceNumber; + /* G29 only work with the 1st interface */ if ((hdev->product == USB_DEVICE_ID_LOGITECH_G29_WHEEL) && (iface_num != 0)) { diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c index 2666af02d5c1..e4e9471d0f1e 100644 --- a/drivers/hid/hid-prodikeys.c +++ b/drivers/hid/hid-prodikeys.c @@ -798,12 +798,18 @@ static int pk_raw_event(struct hid_device *hdev, struct hid_report *report, static int pk_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; - struct usb_interface *intf = to_usb_interface(hdev->dev.parent); - unsigned short ifnum = intf->cur_altsetting->desc.bInterfaceNumber; + struct usb_interface *intf; + unsigned short ifnum; unsigned long quirks = id->driver_data; struct pk_device *pk; struct pcmidi_snd *pm = NULL; + if (!hid_is_usb(hdev)) + return -EINVAL; + + intf = to_usb_interface(hdev->dev.parent); + ifnum = intf->cur_altsetting->desc.bInterfaceNumber; + pk = kzalloc(sizeof(*pk), GFP_KERNEL); if (pk == NULL) { hid_err(hdev, "can't alloc descriptor\n"); diff --git a/drivers/hid/hid-roccat-arvo.c b/drivers/hid/hid-roccat-arvo.c index 4556d2a50f75..d94ee0539421 100644 --- a/drivers/hid/hid-roccat-arvo.c +++ b/drivers/hid/hid-roccat-arvo.c @@ -344,6 +344,9 @@ static int arvo_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-isku.c b/drivers/hid/hid-roccat-isku.c index ce5f22519956..e95d59cd8d07 100644 --- a/drivers/hid/hid-roccat-isku.c +++ b/drivers/hid/hid-roccat-isku.c @@ -324,6 +324,9 @@ static int isku_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-kone.c b/drivers/hid/hid-roccat-kone.c index ea17abc7ad52..76da04801ca9 100644 --- a/drivers/hid/hid-roccat-kone.c +++ b/drivers/hid/hid-roccat-kone.c @@ -749,6 +749,9 @@ static int kone_probe(struct hid_device *hdev, const struct hid_device_id *id) { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-koneplus.c b/drivers/hid/hid-roccat-koneplus.c index 0316edf8c5bb..1896c69ea512 100644 --- a/drivers/hid/hid-roccat-koneplus.c +++ b/drivers/hid/hid-roccat-koneplus.c @@ -431,6 +431,9 @@ static int koneplus_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-konepure.c b/drivers/hid/hid-roccat-konepure.c index 5248b3c7cf78..cf8eeb33a125 100644 --- a/drivers/hid/hid-roccat-konepure.c +++ b/drivers/hid/hid-roccat-konepure.c @@ -133,6 +133,9 @@ static int konepure_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-kovaplus.c b/drivers/hid/hid-roccat-kovaplus.c index 960012881570..6fb9b9563769 100644 --- a/drivers/hid/hid-roccat-kovaplus.c +++ b/drivers/hid/hid-roccat-kovaplus.c @@ -501,6 +501,9 @@ static int kovaplus_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-lua.c b/drivers/hid/hid-roccat-lua.c index 4a88a76d5c62..d5ddf0d68346 100644 --- a/drivers/hid/hid-roccat-lua.c +++ b/drivers/hid/hid-roccat-lua.c @@ -160,6 +160,9 @@ static int lua_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-pyra.c b/drivers/hid/hid-roccat-pyra.c index 989927defe8d..4fcc8e7d276f 100644 --- a/drivers/hid/hid-roccat-pyra.c +++ b/drivers/hid/hid-roccat-pyra.c @@ -449,6 +449,9 @@ static int pyra_probe(struct hid_device *hdev, const struct hid_device_id *id) { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-ryos.c b/drivers/hid/hid-roccat-ryos.c index 3956a6c9c521..5bf1971a2b14 100644 --- a/drivers/hid/hid-roccat-ryos.c +++ b/drivers/hid/hid-roccat-ryos.c @@ -141,6 +141,9 @@ static int ryos_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-savu.c b/drivers/hid/hid-roccat-savu.c index 818701f7a028..a784bb4ee651 100644 --- a/drivers/hid/hid-roccat-savu.c +++ b/drivers/hid/hid-roccat-savu.c @@ -113,6 +113,9 @@ static int savu_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-samsung.c b/drivers/hid/hid-samsung.c index 2e1c31156eca..cf5992e97094 100644 --- a/drivers/hid/hid-samsung.c +++ b/drivers/hid/hid-samsung.c @@ -152,6 +152,9 @@ static int samsung_probe(struct hid_device *hdev, int ret; unsigned int cmask = HID_CONNECT_DEFAULT; + if (!hid_is_usb(hdev)) + return -EINVAL; + ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index d1b107d547f5..c186af552129 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -3000,7 +3000,6 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) sc->quirks = quirks; hid_set_drvdata(hdev, sc); sc->hdev = hdev; - usbdev = to_usb_device(sc->hdev->dev.parent->parent); ret = hid_parse(hdev); if (ret) { @@ -3043,6 +3042,11 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) } if (sc->quirks & (GHL_GUITAR_PS3WIIU | GHL_GUITAR_PS4)) { + if (!hid_is_usb(hdev)) + return -EINVAL; + + usbdev = to_usb_device(sc->hdev->dev.parent->parent); + sc->ghl_urb = usb_alloc_urb(0, GFP_ATOMIC); if (!sc->ghl_urb) return -ENOMEM; diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c index 3a5333424aa3..03b935ff02d5 100644 --- a/drivers/hid/hid-thrustmaster.c +++ b/drivers/hid/hid-thrustmaster.c @@ -274,6 +274,9 @@ static int thrustmaster_probe(struct hid_device *hdev, const struct hid_device_i int ret = 0; struct tm_wheel *tm_wheel = NULL; + if (!hid_is_usb(hdev)) + return -EINVAL; + ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed with error %d\n", ret); diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c index 6a9865dd703c..d8ab0139e5cd 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c @@ -164,6 +164,9 @@ static int uclogic_probe(struct hid_device *hdev, struct uclogic_drvdata *drvdata = NULL; bool params_initialized = false; + if (!hid_is_usb(hdev)) + return -EINVAL; + /* * libinput requires the pad interface to be on a different node * than the pen, so use QUIRK_MULTI_INPUT for all tablets. From f237d9028f844a86955fc9da59d7ac4a5c55d7d5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 2 Dec 2021 12:48:19 +0100 Subject: [PATCH 120/253] HID: add USB_HID dependancy on some USB HID drivers Some HID drivers are only for USB drivers, yet did not depend on CONFIG_USB_HID. This was hidden by the fact that the USB functions were stubbed out in the past, but now that drivers are checking for USB devices properly, build errors can occur with some random configurations. Reported-by: kernel test robot Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211202114819.2511954-1-gregkh@linuxfoundation.org --- drivers/hid/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 9f5435b55949..828c2995ec34 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -214,7 +214,7 @@ config HID_CHICONY config HID_CORSAIR tristate "Corsair devices" - depends on HID && USB && LEDS_CLASS + depends on USB_HID && LEDS_CLASS help Support for Corsair devices that are not fully compliant with the HID standard. @@ -560,7 +560,7 @@ config HID_LENOVO config HID_LOGITECH tristate "Logitech devices" - depends on HID + depends on USB_HID depends on LEDS_CLASS default !EXPERT help @@ -951,7 +951,7 @@ config HID_SAITEK config HID_SAMSUNG tristate "Samsung InfraRed remote control or keyboards" - depends on HID + depends on USB_HID help Support for Samsung InfraRed remote control or keyboards. From 7998193bccc1c6e1537c5f3880fd0d5b949ec9d1 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 2 Dec 2021 10:53:31 +0100 Subject: [PATCH 121/253] HID: sony: fix error path in probe When the setup of the GHL fails, we are not calling hid_hw_stop(). This leads to the hidraw node not being released, meaning a crash whenever somebody attempts to open the file. Cc: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20211202095334.14399-2-benjamin.tissoires@redhat.com --- drivers/hid/hid-sony.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index c186af552129..60ec2b29d54d 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -3037,19 +3037,23 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) */ if (!(hdev->claimed & HID_CLAIMED_INPUT)) { hid_err(hdev, "failed to claim input\n"); - hid_hw_stop(hdev); - return -ENODEV; + ret = -ENODEV; + goto err; } if (sc->quirks & (GHL_GUITAR_PS3WIIU | GHL_GUITAR_PS4)) { - if (!hid_is_usb(hdev)) - return -EINVAL; + if (!hid_is_usb(hdev)) { + ret = -EINVAL; + goto err; + } usbdev = to_usb_device(sc->hdev->dev.parent->parent); sc->ghl_urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!sc->ghl_urb) - return -ENOMEM; + if (!sc->ghl_urb) { + ret = -ENOMEM; + goto err; + } if (sc->quirks & GHL_GUITAR_PS3WIIU) ret = ghl_init_urb(sc, usbdev, ghl_ps3wiiu_magic_data, @@ -3059,7 +3063,7 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) ARRAY_SIZE(ghl_ps4_magic_data)); if (ret) { hid_err(hdev, "error preparing URB\n"); - return ret; + goto err; } timer_setup(&sc->ghl_poke_timer, ghl_magic_poke, 0); @@ -3068,6 +3072,10 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) } return ret; + +err: + hid_hw_stop(hdev); + return ret; } static void sony_remove(struct hid_device *hdev) From 918aa1ef104d286d16b9e7ef139a463ac7a296f0 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 2 Dec 2021 10:53:32 +0100 Subject: [PATCH 122/253] HID: bigbenff: prevent null pointer dereference When emulating the device through uhid, there is a chance we don't have output reports and so report_field is null. Cc: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20211202095334.14399-3-benjamin.tissoires@redhat.com --- drivers/hid/hid-bigbenff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c index db6da21ade06..74ad8bf98bfd 100644 --- a/drivers/hid/hid-bigbenff.c +++ b/drivers/hid/hid-bigbenff.c @@ -191,7 +191,7 @@ static void bigben_worker(struct work_struct *work) struct bigben_device, worker); struct hid_field *report_field = bigben->report->field[0]; - if (bigben->removed) + if (bigben->removed || !report_field) return; if (bigben->work_led) { From 72641d8d60401a5f1e1a0431ceaf928680d34418 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 19 Nov 2021 06:09:30 -0800 Subject: [PATCH 123/253] Revert "drm/i915: Implement Wa_1508744258" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This workarounds are causing hangs, because I missed the fact that it needs to be enabled for all cases and disabled when doing a resolve pass. So KMD only needs to whitelist it and UMD will be the one setting it on per case. This reverts commit 28ec02c9cbebf3feeaf21a59df9dfbc02bda3362. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4145 Signed-off-by: José Roberto de Souza Fixes: 28ec02c9cbeb ("drm/i915: Implement Wa_1508744258") Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20211119140931.32791-1-jose.souza@intel.com (cherry picked from commit f3799ff16fcfacd44aee55db162830df461b631f) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index e1f362530889..ed73d9bc9d40 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -621,13 +621,6 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, 0, false); - - /* - * Wa_14012131227:dg1 - * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p - */ - wa_masked_en(wal, GEN7_COMMON_SLICE_CHICKEN1, - GEN9_RHWO_OPTIMIZATION_DISABLE); } static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, From 3c088b1e82cfb7c889823d39846d32079f190f3f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 26 Nov 2021 15:16:31 +0100 Subject: [PATCH 124/253] s390: update defconfigs Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 10 ++++++++-- arch/s390/configs/defconfig | 7 ++++++- arch/s390/configs/zfcpdump_defconfig | 2 ++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index fd825097cf04..b626bc6e0eaf 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -403,7 +403,6 @@ CONFIG_DEVTMPFS=y CONFIG_CONNECTOR=y CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y @@ -476,6 +475,7 @@ CONFIG_MACVLAN=m CONFIG_MACVTAP=m CONFIG_VXLAN=m CONFIG_BAREUDP=m +CONFIG_AMT=m CONFIG_TUN=m CONFIG_VETH=m CONFIG_VIRTIO_NET=m @@ -489,6 +489,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_AMD is not set # CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_ASIX is not set # CONFIG_NET_VENDOR_ATHEROS is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_BROCADE is not set @@ -571,6 +572,7 @@ CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m +# CONFIG_DRM_DEBUG_MODESET_LOCK is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y @@ -775,12 +777,14 @@ CONFIG_CRC4=m CONFIG_CRC7=m CONFIG_CRC8=m CONFIG_RANDOM32_SELFTEST=y +CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_INFO_BTF=y CONFIG_GDB_SCRIPTS=y CONFIG_HEADERS_INSTALL=y CONFIG_DEBUG_SECTION_MISMATCH=y @@ -807,6 +811,7 @@ CONFIG_DEBUG_MEMORY_INIT=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m CONFIG_DEBUG_PER_CPU_MAPS=y CONFIG_KFENCE=y +CONFIG_KFENCE_STATIC_KEYS=y CONFIG_DEBUG_SHIRQ=y CONFIG_PANIC_ON_OOPS=y CONFIG_DETECT_HUNG_TASK=y @@ -842,6 +847,7 @@ CONFIG_FTRACE_STARTUP_TEST=y CONFIG_SAMPLES=y CONFIG_SAMPLE_TRACE_PRINTK=m CONFIG_SAMPLE_FTRACE_DIRECT=m +CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m CONFIG_DEBUG_ENTRY=y CONFIG_CIO_INJECT=y CONFIG_KUNIT=m @@ -860,7 +866,7 @@ CONFIG_FAIL_FUNCTION=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LKDTM=m CONFIG_TEST_MIN_HEAP=y -CONFIG_KPROBES_SANITY_TEST=y +CONFIG_KPROBES_SANITY_TEST=m CONFIG_RBTREE_TEST=y CONFIG_INTERVAL_TREE_TEST=m CONFIG_PERCPU_TEST=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index c9c3cedff2d8..0056cab27372 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -394,7 +394,6 @@ CONFIG_DEVTMPFS=y CONFIG_CONNECTOR=y CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y @@ -467,6 +466,7 @@ CONFIG_MACVLAN=m CONFIG_MACVTAP=m CONFIG_VXLAN=m CONFIG_BAREUDP=m +CONFIG_AMT=m CONFIG_TUN=m CONFIG_VETH=m CONFIG_VIRTIO_NET=m @@ -480,6 +480,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_AMD is not set # CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_ASIX is not set # CONFIG_NET_VENDOR_ATHEROS is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_BROCADE is not set @@ -762,12 +763,14 @@ CONFIG_PRIME_NUMBERS=m CONFIG_CRC4=m CONFIG_CRC7=m CONFIG_CRC8=m +CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_INFO_BTF=y CONFIG_GDB_SCRIPTS=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y @@ -792,9 +795,11 @@ CONFIG_HIST_TRIGGERS=y CONFIG_SAMPLES=y CONFIG_SAMPLE_TRACE_PRINTK=m CONFIG_SAMPLE_FTRACE_DIRECT=m +CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m CONFIG_KUNIT=m CONFIG_KUNIT_DEBUGFS=y CONFIG_LKDTM=m +CONFIG_KPROBES_SANITY_TEST=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BPF=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index aceccf3b9a88..eed3b9acfa71 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -65,9 +65,11 @@ CONFIG_ZFCP=y # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_LSM="yama,loadpin,safesetid,integrity" # CONFIG_ZLIB_DFLTCC is not set +CONFIG_XZ_DEC_MICROLZMA=y CONFIG_PRINTK_TIME=y # CONFIG_SYMBOLIC_ERRNAME is not set CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_PANIC_ON_OOPS=y From d9847eb8be3d895b2b5f514fdf3885d47a0b92a2 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 22 Nov 2021 20:17:40 +0530 Subject: [PATCH 125/253] bpf: Make CONFIG_DEBUG_INFO_BTF depend upon CONFIG_BPF_SYSCALL Vinicius Costa Gomes reported [0] that build fails when CONFIG_DEBUG_INFO_BTF is enabled and CONFIG_BPF_SYSCALL is disabled. This leads to btf.c not being compiled, and then no symbol being present in vmlinux for the declarations in btf.h. Since BTF is not useful without enabling BPF subsystem, disallow this combination. However, theoretically disabling both now could still fail, as the symbol for kfunc_btf_id_list variables is not available. This isn't a problem as the compiler usually optimizes the whole register/unregister call, but at lower optimization levels it can fail the build in linking stage. Fix that by adding dummy variables so that modules taking address of them still work, but the whole thing is a noop. [0]: https://lore.kernel.org/bpf/20211110205418.332403-1-vinicius.gomes@intel.com Fixes: 14f267d95fe4 ("bpf: btf: Introduce helpers for dynamic BTF set registration") Reported-by: Vinicius Costa Gomes Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211122144742.477787-2-memxor@gmail.com --- include/linux/btf.h | 14 ++++++++++---- kernel/bpf/btf.c | 9 ++------- lib/Kconfig.debug | 1 + 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/include/linux/btf.h b/include/linux/btf.h index 203eef993d76..0e1b6281fd8f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -245,7 +245,10 @@ struct kfunc_btf_id_set { struct module *owner; }; -struct kfunc_btf_id_list; +struct kfunc_btf_id_list { + struct list_head list; + struct mutex mutex; +}; #ifdef CONFIG_DEBUG_INFO_BTF_MODULES void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, @@ -254,6 +257,9 @@ void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, struct kfunc_btf_id_set *s); bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, struct module *owner); + +extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list; +extern struct kfunc_btf_id_list prog_test_kfunc_list; #else static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, struct kfunc_btf_id_set *s) @@ -268,13 +274,13 @@ static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, { return false; } + +static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused; +static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused; #endif #define DEFINE_KFUNC_BTF_ID_SET(set, name) \ struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set), \ THIS_MODULE } -extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list; -extern struct kfunc_btf_id_list prog_test_kfunc_list; - #endif diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index dbc3ad07e21b..ea3df9867cec 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6346,11 +6346,6 @@ BTF_ID_LIST_GLOBAL_SINGLE(btf_task_struct_ids, struct, task_struct) /* BTF ID set registration API for modules */ -struct kfunc_btf_id_list { - struct list_head list; - struct mutex mutex; -}; - #ifdef CONFIG_DEBUG_INFO_BTF_MODULES void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, @@ -6389,8 +6384,6 @@ bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, return false; } -#endif - #define DEFINE_KFUNC_BTF_ID_LIST(name) \ struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \ __MUTEX_INITIALIZER(name.mutex) }; \ @@ -6398,3 +6391,5 @@ bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list); DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list); + +#endif diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9ef7ce18b4f5..596bb5e4790c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -316,6 +316,7 @@ config DEBUG_INFO_BTF bool "Generate BTF typeinfo" depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST + depends on BPF_SYSCALL help Generate deduplicated BTF type information from DWARF debug info. Turning this on expects presence of pahole tool, which will convert From b12f031043247b80999bf5e03b8cded3b0b40f8d Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 22 Nov 2021 20:17:41 +0530 Subject: [PATCH 126/253] bpf: Fix bpf_check_mod_kfunc_call for built-in modules When module registering its set is built-in, THIS_MODULE will be NULL, hence we cannot return early in case owner is NULL. Fixes: 14f267d95fe4 ("bpf: btf: Introduce helpers for dynamic BTF set registration") Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211122144742.477787-3-memxor@gmail.com --- kernel/bpf/btf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index ea3df9867cec..9bdb03767db5 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6371,8 +6371,6 @@ bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, { struct kfunc_btf_id_set *s; - if (!owner) - return false; mutex_lock(&klist->mutex); list_for_each_entry(s, &klist->list, list) { if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) { From 3345193f6f3cc24791c245d4ba2c38502f1cf684 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 22 Nov 2021 20:17:42 +0530 Subject: [PATCH 127/253] tools/resolve_btfids: Skip unresolved symbol warning for empty BTF sets resolve_btfids prints a warning when it finds an unresolved symbol, (id == 0) in id_patch. This can be the case for BTF sets that are empty (due to disabled config options), hence printing warnings for certain builds, most recently seen in [0]. The reason behind this is because id->cnt aliases id->id in btf_id struct, leading to empty set showing up as ID 0 when we get to id_patch, which triggers the warning. Since sets are an exception here, accomodate by reusing hole in btf_id for bool is_set member, setting it to true for BTF set when setting id->cnt, and use that to skip extraneous warning. [0]: https://lore.kernel.org/all/1b99ae14-abb4-d18f-cc6a-d7e523b25542@gmail.com Before: ; ./tools/bpf/resolve_btfids/resolve_btfids -v -b vmlinux net/ipv4/tcp_cubic.ko adding symbol tcp_cubic_kfunc_ids WARN: resolve_btfids: unresolved symbol tcp_cubic_kfunc_ids patching addr 0: ID 0 [tcp_cubic_kfunc_ids] sorting addr 4: cnt 0 [tcp_cubic_kfunc_ids] update ok for net/ipv4/tcp_cubic.ko After: ; ./tools/bpf/resolve_btfids/resolve_btfids -v -b vmlinux net/ipv4/tcp_cubic.ko adding symbol tcp_cubic_kfunc_ids patching addr 0: ID 0 [tcp_cubic_kfunc_ids] sorting addr 4: cnt 0 [tcp_cubic_kfunc_ids] update ok for net/ipv4/tcp_cubic.ko Fixes: 0e32dfc80bae ("bpf: Enable TCP congestion control kfunc from modules") Reported-by: Pavel Skripkin Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211122144742.477787-4-memxor@gmail.com --- tools/bpf/resolve_btfids/main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index a59cb0ee609c..73409e27be01 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -83,6 +83,7 @@ struct btf_id { int cnt; }; int addr_cnt; + bool is_set; Elf64_Addr addr[ADDR_CNT]; }; @@ -451,8 +452,10 @@ static int symbols_collect(struct object *obj) * in symbol's size, together with 'cnt' field hence * that - 1. */ - if (id) + if (id) { id->cnt = sym.st_size / sizeof(int) - 1; + id->is_set = true; + } } else { pr_err("FAILED unsupported prefix %s\n", prefix); return -1; @@ -568,9 +571,8 @@ static int id_patch(struct object *obj, struct btf_id *id) int *ptr = data->d_buf; int i; - if (!id->id) { + if (!id->id && !id->is_set) pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name); - } for (i = 0; i < id->addr_cnt; i++) { unsigned long addr = id->addr[i]; From f6071e5e3961eeb5300bd0901c9e128598730ae3 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 30 Nov 2021 16:47:20 -0800 Subject: [PATCH 128/253] selftests/fib_tests: Rework fib_rp_filter_test() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently rp_filter tests in fib_tests.sh:fib_rp_filter_test() are failing. ping sockets are bound to dummy1 using the "-I" option (SO_BINDTODEVICE), but socket lookup is failing when receiving ping replies, since the routing table thinks they belong to dummy0. For example, suppose ping is using a SOCK_RAW socket for ICMP messages. When receiving ping replies, in __raw_v4_lookup(), sk->sk_bound_dev_if is 3 (dummy1), but dif (skb_rtable(skb)->rt_iif) says 2 (dummy0), so the raw_sk_bound_dev_eq() check fails. Similar things happen in ping_lookup() for SOCK_DGRAM sockets. These tests used to pass due to a bug [1] in iputils, where "ping -I" actually did not bind ICMP message sockets to device. The bug has been fixed by iputils commit f455fee41c07 ("ping: also bind the ICMP socket to the specific device") in 2016, which is why our rp_filter tests started to fail. See [2] . Fixing the tests while keeping everything in one netns turns out to be nontrivial. Rework the tests and build the following topology: ┌─────────────────────────────┐ ┌─────────────────────────────┐ │ network namespace 1 (ns1) │ │ network namespace 2 (ns2) │ │ │ │ │ │ ┌────┐ ┌─────┐ │ │ ┌─────┐ ┌────┐ │ │ │ lo │<───>│veth1│<────────┼────┼─>│veth2│<──────────>│ lo │ │ │ └────┘ ├─────┴──────┐ │ │ ├─────┴──────┐ └────┘ │ │ │192.0.2.1/24│ │ │ │192.0.2.1/24│ │ │ └────────────┘ │ │ └────────────┘ │ └─────────────────────────────┘ └─────────────────────────────┘ Consider sending an ICMP_ECHO packet A in ns2. Both source and destination IP addresses are 192.0.2.1, and we use strict mode rp_filter in both ns1 and ns2: 1. A is routed to lo since its destination IP address is one of ns2's local addresses (veth2); 2. A is redirected from lo's egress to veth2's egress using mirred; 3. A arrives at veth1's ingress in ns1; 4. A is redirected from veth1's ingress to lo's ingress, again, using mirred; 5. In __fib_validate_source(), fib_info_nh_uses_dev() returns false, since A was received on lo, but reverse path lookup says veth1; 6. However A is not dropped since we have relaxed this check for lo in commit 66f8209547cc ("fib: relax source validation check for loopback packets"); Making sure A is not dropped here in this corner case is the whole point of having this test. 7. As A reaches the ICMP layer, an ICMP_ECHOREPLY packet, B, is generated; 8. Similarly, B is redirected from lo's egress to veth1's egress (in ns1), then redirected once again from veth2's ingress to lo's ingress (in ns2), using mirred. Also test "ping 127.0.0.1" from ns2. It does not trigger the relaxed check in __fib_validate_source(), but just to make sure the topology works with loopback addresses. Tested with ping from iputils 20210722-41-gf9fb573: $ ./fib_tests.sh -t rp_filter IPv4 rp_filter tests TEST: rp_filter passes local packets [ OK ] TEST: rp_filter passes loopback packets [ OK ] [1] https://github.com/iputils/iputils/issues/55 [2] https://github.com/iputils/iputils/commit/f455fee41c077d4b700a473b2f5b3487b8febc1d Reported-by: Hangbin Liu Fixes: adb701d6cfa4 ("selftests: add a test case for rp_filter") Reviewed-by: Cong Wang Signed-off-by: Peilin Ye Acked-by: David Ahern Link: https://lore.kernel.org/r/20211201004720.6357-1-yepeilin.cs@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_tests.sh | 59 ++++++++++++++++++++---- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 5abe92d55b69..996af1ae3d3d 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -444,24 +444,63 @@ fib_rp_filter_test() setup set -e + ip netns add ns2 + ip netns set ns2 auto + + ip -netns ns2 link set dev lo up + + $IP link add name veth1 type veth peer name veth2 + $IP link set dev veth2 netns ns2 + $IP address add 192.0.2.1/24 dev veth1 + ip -netns ns2 address add 192.0.2.1/24 dev veth2 + $IP link set dev veth1 up + ip -netns ns2 link set dev veth2 up + $IP link set dev lo address 52:54:00:6a:c7:5e - $IP link set dummy0 address 52:54:00:6a:c7:5e - $IP link add dummy1 type dummy - $IP link set dummy1 address 52:54:00:6a:c7:5e - $IP link set dev dummy1 up + $IP link set dev veth1 address 52:54:00:6a:c7:5e + ip -netns ns2 link set dev lo address 52:54:00:6a:c7:5e + ip -netns ns2 link set dev veth2 address 52:54:00:6a:c7:5e + + # 1. (ns2) redirect lo's egress to veth2's egress + ip netns exec ns2 tc qdisc add dev lo parent root handle 1: fq_codel + ip netns exec ns2 tc filter add dev lo parent 1: protocol arp basic \ + action mirred egress redirect dev veth2 + ip netns exec ns2 tc filter add dev lo parent 1: protocol ip basic \ + action mirred egress redirect dev veth2 + + # 2. (ns1) redirect veth1's ingress to lo's ingress + $NS_EXEC tc qdisc add dev veth1 ingress + $NS_EXEC tc filter add dev veth1 ingress protocol arp basic \ + action mirred ingress redirect dev lo + $NS_EXEC tc filter add dev veth1 ingress protocol ip basic \ + action mirred ingress redirect dev lo + + # 3. (ns1) redirect lo's egress to veth1's egress + $NS_EXEC tc qdisc add dev lo parent root handle 1: fq_codel + $NS_EXEC tc filter add dev lo parent 1: protocol arp basic \ + action mirred egress redirect dev veth1 + $NS_EXEC tc filter add dev lo parent 1: protocol ip basic \ + action mirred egress redirect dev veth1 + + # 4. (ns2) redirect veth2's ingress to lo's ingress + ip netns exec ns2 tc qdisc add dev veth2 ingress + ip netns exec ns2 tc filter add dev veth2 ingress protocol arp basic \ + action mirred ingress redirect dev lo + ip netns exec ns2 tc filter add dev veth2 ingress protocol ip basic \ + action mirred ingress redirect dev lo + $NS_EXEC sysctl -qw net.ipv4.conf.all.rp_filter=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.accept_local=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.route_localnet=1 - - $NS_EXEC tc qd add dev dummy1 parent root handle 1: fq_codel - $NS_EXEC tc filter add dev dummy1 parent 1: protocol arp basic action mirred egress redirect dev lo - $NS_EXEC tc filter add dev dummy1 parent 1: protocol ip basic action mirred egress redirect dev lo + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1 + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.accept_local=1 + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1 set +e - run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 198.51.100.1" + run_cmd "ip netns exec ns2 ping -w1 -c1 192.0.2.1" log_test $? 0 "rp_filter passes local packets" - run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 127.0.0.1" + run_cmd "ip netns exec ns2 ping -w1 -c1 127.0.0.1" log_test $? 0 "rp_filter passes loopback packets" cleanup From 988f01683c7f2bf9f8fe2bae1cf4010fcd1baaf5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Dec 2021 21:45:34 +0100 Subject: [PATCH 129/253] objtool: Fix pv_ops noinstr validation Boris reported that in one of his randconfig builds, objtool got infinitely stuck. Turns out there's trivial list corruption in the pv_ops tracking when a function is both in a static table and in a code assignment. Avoid re-adding function to the pv_ops[] lists when they're already on it. Fixes: db2b0c5d7b6f ("objtool: Support pv_opsindirect calls for noinstr") Reported-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Tested-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211202204534.GA16608@worktop.programming.kicks-ass.net --- tools/objtool/elf.c | 1 + tools/objtool/objtool.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 81a4c543ff7e..4b384c907027 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -375,6 +375,7 @@ static int read_symbols(struct elf *elf) return -1; } memset(sym, 0, sizeof(*sym)); + INIT_LIST_HEAD(&sym->pv_target); sym->alias = sym; sym->idx = i; diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index c90c7084e45a..bdf699f6552b 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -153,6 +153,10 @@ void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func) !strcmp(func->name, "_paravirt_ident_64")) return; + /* already added this function */ + if (!list_empty(&func->pv_target)) + return; + list_add(&func->pv_target, &f->pv_ops[idx].targets); f->pv_ops[idx].clean = false; } From 51523ed1c26758de1af7e58730a656875f72f783 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 2 Dec 2021 16:32:26 +0100 Subject: [PATCH 130/253] x86/64/mm: Map all kernel memory into trampoline_pgd The trampoline_pgd only maps the 0xfffffff000000000-0xffffffffffffffff range of kernel memory (with 4-level paging). This range contains the kernel's text+data+bss mappings and the module mapping space but not the direct mapping and the vmalloc area. This is enough to get the application processors out of real-mode, but for code that switches back to real-mode the trampoline_pgd is missing important parts of the address space. For example, consider this code from arch/x86/kernel/reboot.c, function machine_real_restart() for a 64-bit kernel: #ifdef CONFIG_X86_32 load_cr3(initial_page_table); #else write_cr3(real_mode_header->trampoline_pgd); /* Exiting long mode will fail if CR4.PCIDE is set. */ if (boot_cpu_has(X86_FEATURE_PCID)) cr4_clear_bits(X86_CR4_PCIDE); #endif /* Jump to the identity-mapped low memory code */ #ifdef CONFIG_X86_32 asm volatile("jmpl *%0" : : "rm" (real_mode_header->machine_real_restart_asm), "a" (type)); #else asm volatile("ljmpl *%0" : : "m" (real_mode_header->machine_real_restart_asm), "D" (type)); #endif The code switches to the trampoline_pgd, which unmaps the direct mapping and also the kernel stack. The call to cr4_clear_bits() will find no stack and crash the machine. The real_mode_header pointer below points into the direct mapping, and dereferencing it also causes a crash. The reason this does not crash always is only that kernel mappings are global and the CR3 switch does not flush those mappings. But if theses mappings are not in the TLB already, the above code will crash before it can jump to the real-mode stub. Extend the trampoline_pgd to contain all kernel mappings to prevent these crashes and to make code which runs on this page-table more robust. Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20211202153226.22946-5-joro@8bytes.org --- arch/x86/realmode/init.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 4a3da7592b99..38d24d2ab38b 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -72,6 +72,7 @@ static void __init setup_real_mode(void) #ifdef CONFIG_X86_64 u64 *trampoline_pgd; u64 efer; + int i; #endif base = (unsigned char *)real_mode_header; @@ -128,8 +129,17 @@ static void __init setup_real_mode(void) trampoline_header->flags = 0; trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); + + /* Map the real mode stub as virtual == physical */ trampoline_pgd[0] = trampoline_pgd_entry.pgd; - trampoline_pgd[511] = init_top_pgt[511].pgd; + + /* + * Include the entirety of the kernel mapping into the trampoline + * PGD. This way, all mappings present in the normal kernel page + * tables are usable while running on trampoline_pgd. + */ + for (i = pgd_index(__PAGE_OFFSET); i < PTRS_PER_PGD; i++) + trampoline_pgd[i] = init_top_pgt[i].pgd; #endif sme_sev_setup_real_mode(trampoline_header); From d080811f27936f712f619f847389f403ac873b8f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 3 Dec 2021 08:59:27 +0100 Subject: [PATCH 131/253] HID: add USB_HID dependancy to hid-chicony The chicony HID driver only controls USB devices, yet did not have a dependancy on USB_HID. This causes build errors on some configurations like sparc when building due to new changes to the chicony driver. Reported-by: Stephen Rothwell Cc: stable@vger.kernel.org Cc: Jiri Kosina Cc: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211203075927.2829218-1-gregkh@linuxfoundation.org --- drivers/hid/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 828c2995ec34..e9bc8efed5a1 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -207,7 +207,7 @@ config HID_CHERRY config HID_CHICONY tristate "Chicony devices" - depends on HID + depends on USB_HID default !EXPERT help Support for Chicony Tactical pad and special keys on Chicony keyboards. From 30cb3c2ad24b66fb7639a6d1f4390c74d6e68f94 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 3 Dec 2021 09:12:31 +0100 Subject: [PATCH 132/253] HID: add USB_HID dependancy to hid-prodikeys The prodikeys HID driver only controls USB devices, yet did not have a dependancy on USB_HID. This causes build errors on some configurations like nios2 when building due to new changes to the prodikeys driver. Reported-by: kernel test robot Cc: stable@vger.kernel.org Cc: Jiri Kosina Cc: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211203081231.2856936-1-gregkh@linuxfoundation.org --- drivers/hid/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index e9bc8efed5a1..a7c78ac96270 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -245,7 +245,7 @@ config HID_MACALLY config HID_PRODIKEYS tristate "Prodikeys PC-MIDI Keyboard support" - depends on HID && SND + depends on USB_HID && SND select SND_RAWMIDI help Support for Prodikeys PC-MIDI Keyboard device support. From caff009098e6cf59fd6ac21c3a3befcc854978b4 Mon Sep 17 00:00:00 2001 From: xiazhengqiao Date: Fri, 3 Dec 2021 11:01:19 +0800 Subject: [PATCH 133/253] HID: google: add eel USB id Add one additional hammer-like device. Signed-off-by: xiazhengqiao Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211203030119.28612-1-xiazhengqiao@huaqin.corp-partner.google.com --- drivers/hid/hid-google-hammer.c | 2 ++ drivers/hid/hid-ids.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index 8123b871a3eb..0403beb3104b 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -585,6 +585,8 @@ static void hammer_remove(struct hid_device *hdev) static const struct hid_device_id hammer_devices[] = { { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_DON) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_EEL) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_HAMMER) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index aeb907b57ab3..ca418bffd3b2 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -501,6 +501,7 @@ #define USB_DEVICE_ID_GOOGLE_MAGNEMITE 0x503d #define USB_DEVICE_ID_GOOGLE_MOONBALL 0x5044 #define USB_DEVICE_ID_GOOGLE_DON 0x5050 +#define USB_DEVICE_ID_GOOGLE_EEL 0x5057 #define USB_VENDOR_ID_GOTOP 0x08f2 #define USB_DEVICE_ID_SUPER_Q2 0x007f From 086e81f6b90e41a07a1a885bb11e93daa6915747 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 30 Nov 2021 07:01:17 +0100 Subject: [PATCH 134/253] HID: intel-ish-hid: ipc: only enable IRQ wakeup when requested MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes spurious wakeups from s0ix on Lenovo ThinkPad X1 Cargon Gen 9 on lid close. These wakeups are generated by interrupts from the ISH on changes to the lid status. By disabling the wake IRQ from the ISH we inhibit these spurious wakeups while keeping the resume from LID open through the ACPI interrupt. Reports on the Lenovo forums indicate that Lenovo ThinkPad X1 Yoga Gen6 is also affected. Fixes: ae02e5d40d5f ("HID: intel-ish-hid: ipc layer") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=214855 Signed-off-by: Thomas Weißschuh Acked-by: Srinivas Pandruvada Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211130060117.3026-1-linux@weissschuh.net --- drivers/hid/intel-ish-hid/ipc/pci-ish.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index 1c5039081db2..8e9d9450cb83 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -266,7 +266,8 @@ static void __maybe_unused ish_resume_handler(struct work_struct *work) if (ish_should_leave_d0i3(pdev) && !dev->suspend_flag && IPC_IS_ISH_ILUP(fwsts)) { - disable_irq_wake(pdev->irq); + if (device_may_wakeup(&pdev->dev)) + disable_irq_wake(pdev->irq); ish_set_host_ready(dev); @@ -337,7 +338,8 @@ static int __maybe_unused ish_suspend(struct device *device) */ pci_save_state(pdev); - enable_irq_wake(pdev->irq); + if (device_may_wakeup(&pdev->dev)) + enable_irq_wake(pdev->irq); } } else { /* From 96f3896780153214040a6747974bebc1355307c0 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Fri, 3 Dec 2021 10:53:21 +0800 Subject: [PATCH 135/253] selftests/tc-testing: add exit code Mark the summary result as FAIL to prevent from confusing the selftest framework if some of them are failed. Previously, the selftest framework always treats it as *ok* even though some of them are failed actually. That's because the script tdc.sh always return 0. # All test results: # # 1..97 # ok 1 83be - Create FQ-PIE with invalid number of flows # ok 2 8b6e - Create RED with no flags [...snip] # ok 6 5f15 - Create RED with flags ECN, harddrop # ok 7 53e8 - Create RED with flags ECN, nodrop # ok 8 d091 - Fail to create RED with only nodrop flag # ok 9 af8e - Create RED with flags ECN, nodrop, harddrop # not ok 10 ce7d - Add mq Qdisc to multi-queue device (4 queues) # Could not match regex pattern. Verify command output: # qdisc mq 1: root # qdisc fq_codel 0: parent 1:4 limit 10240p flows 1024 quantum 1514 target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64 # qdisc fq_codel 0: parent 1:3 limit 10240p flows 1024 quantum 1514 target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64 [...snip] # ok 96 6979 - Change quantum of a strict ETS band # ok 97 9a7d - Change ETS strict band without quantum # # # # ok 1 selftests: tc-testing: tdc.sh <<< summary result CC: Philip Li Reported-by: kernel test robot Signed-off-by: Li Zhijian Acked-by: Davide Caratti Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/tdc.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index a3e43189d940..ee22e3447ec7 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -716,6 +716,7 @@ def set_operation_mode(pm, parser, args, remaining): list_test_cases(alltests) exit(0) + exit_code = 0 # KSFT_PASS if len(alltests): req_plugins = pm.get_required_plugins(alltests) try: @@ -724,6 +725,8 @@ def set_operation_mode(pm, parser, args, remaining): print('The following plugins were not found:') print('{}'.format(pde.missing_pg)) catresults = test_runner(pm, args, alltests) + if catresults.count_failures() != 0: + exit_code = 1 # KSFT_FAIL if args.format == 'none': print('Test results output suppression requested\n') else: @@ -748,6 +751,8 @@ def set_operation_mode(pm, parser, args, remaining): gid=int(os.getenv('SUDO_GID'))) else: print('No tests found\n') + exit_code = 4 # KSFT_SKIP + exit(exit_code) def main(): """ @@ -767,8 +772,5 @@ def main(): set_operation_mode(pm, parser, args, remaining) - exit(0) - - if __name__ == "__main__": main() From a8c9505c53c5f1f0aba572a4c70e2d91ad08434e Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Fri, 3 Dec 2021 10:53:22 +0800 Subject: [PATCH 136/253] selftests/tc-testing: add missing config qdiscs/fq_pie requires CONFIG_NET_SCH_FQ_PIE, otherwise tc will fail to create a fq_pie qdisc. It fixes following issue: # not ok 57 83be - Create FQ-PIE with invalid number of flows # Command exited with 2, expected 0 # Error: Specified qdisc not found. Signed-off-by: Li Zhijian Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index b71828df5a6d..b1cd7efa4512 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -60,6 +60,7 @@ CONFIG_NET_IFE_SKBTCINDEX=m CONFIG_NET_SCH_FIFO=y CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_FQ_PIE=m # ## Network testing From db925bca33a9f0029a9891defd926a4856dd5c87 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Fri, 3 Dec 2021 10:53:23 +0800 Subject: [PATCH 137/253] selftests/tc-testing: Fix cannot create /sys/bus/netdevsim/new_device: Directory nonexistent Install netdevsim to provide /sys/bus/netdevsim/new_device interface. It helps to fix: # ok 97 9a7d - Change ETS strict band without quantum # skipped - skipped - previous setup failed 11 ce7d # # # -----> prepare stage *** Could not execute: "echo "1 1 4" > /sys/bus/netdevsim/new_device" # # -----> prepare stage *** Error message: "/bin/sh: 1: cannot create /sys/bus/netdevsim/new_device: Directory nonexistent # " # # -----> prepare stage *** Aborting test run. # # # <_io.BufferedReader name=5> *** stdout *** # Signed-off-by: Li Zhijian Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/config | 1 + tools/testing/selftests/tc-testing/tdc.sh | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index b1cd7efa4512..a3239d5e40c7 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -61,6 +61,7 @@ CONFIG_NET_SCH_FIFO=y CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_RED=m CONFIG_NET_SCH_FQ_PIE=m +CONFIG_NETDEVSIM=m # ## Network testing diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index 7fe38c76db44..afb0cd86fa3d 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -1,5 +1,6 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +modprobe netdevsim ./tdc.py -c actions --nobuildebpf ./tdc.py -c qdisc From 09f736aa95476631227d2dc0e6b9aeee1ad7ed58 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 26 Nov 2021 14:23:40 +0200 Subject: [PATCH 138/253] xhci: Fix commad ring abort, write all 64 bits to CRCR register. Turns out some xHC controllers require all 64 bits in the CRCR register to be written to execute a command abort. The lower 32 bits containing the command abort bit is written first. In case the command ring stops before we write the upper 32 bits then hardware may use these upper bits to set the commnd ring dequeue pointer. Solve this by making sure the upper 32 bits contain a valid command ring dequeue pointer. The original patch that only wrote the first 32 to stop the ring went to stable, so this fix should go there as well. Fixes: ff0e50d3564f ("xhci: Fix command ring pointer corruption while aborting a command") Cc: stable@vger.kernel.org Tested-by: Pavankumar Kondeti Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20211126122340.1193239-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 311597bba80e..eaa49aef2935 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -366,7 +366,9 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, /* Must be called with xhci->lock held, releases and aquires lock back */ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags) { - u32 temp_32; + struct xhci_segment *new_seg = xhci->cmd_ring->deq_seg; + union xhci_trb *new_deq = xhci->cmd_ring->dequeue; + u64 crcr; int ret; xhci_dbg(xhci, "Abort command ring\n"); @@ -375,13 +377,18 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags) /* * The control bits like command stop, abort are located in lower - * dword of the command ring control register. Limit the write - * to the lower dword to avoid corrupting the command ring pointer - * in case if the command ring is stopped by the time upper dword - * is written. + * dword of the command ring control register. + * Some controllers require all 64 bits to be written to abort the ring. + * Make sure the upper dword is valid, pointing to the next command, + * avoiding corrupting the command ring pointer in case the command ring + * is stopped by the time the upper dword is written. */ - temp_32 = readl(&xhci->op_regs->cmd_ring); - writel(temp_32 | CMD_RING_ABORT, &xhci->op_regs->cmd_ring); + next_trb(xhci, NULL, &new_seg, &new_deq); + if (trb_is_link(new_deq)) + next_trb(xhci, NULL, &new_seg, &new_deq); + + crcr = xhci_trb_virt_to_dma(new_seg, new_deq); + xhci_write_64(xhci, crcr | CMD_RING_ABORT, &xhci->op_regs->cmd_ring); /* Section 4.6.1.2 of xHCI 1.0 spec says software should also time the * completion of the Command Abort operation. If CRR is not negated in 5 From d2a004037c3c6afd36d40c384d2905f47cd51c57 Mon Sep 17 00:00:00 2001 From: Ole Ernst Date: Sat, 27 Nov 2021 10:05:45 +0100 Subject: [PATCH 139/253] USB: NO_LPM quirk Lenovo Powered USB-C Travel Hub This is another branded 8153 device that doesn't work well with LPM: r8152 2-2.1:1.0 enp0s13f0u2u1: Stop submitting intr, status -71 Disable LPM to resolve the issue. Signed-off-by: Ole Ernst Cc: stable Link: https://lore.kernel.org/r/20211127090546.52072-1-olebowle@gmx.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 8239fe7129dd..019351c0b52c 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -434,6 +434,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1532, 0x0116), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Lenovo Powered USB-C Travel Hub (4X90S92381, RTL8153 GigE) */ + { USB_DEVICE(0x17ef, 0x721e), .driver_info = USB_QUIRK_NO_LPM }, + /* Lenovo ThinkCenter A630Z TI024Gen3 usb-audio */ { USB_DEVICE(0x17ef, 0xa012), .driver_info = USB_QUIRK_DISCONNECT_SUSPEND }, From fbcd13df1e78eb2ba83a3c160eefe2d6f574beaf Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 29 Nov 2021 16:18:25 -0800 Subject: [PATCH 140/253] usb: typec: tcpm: Wait in SNK_DEBOUNCED until disconnect Stub from the spec: "4.5.2.2.4.2 Exiting from AttachWait.SNK State A Sink shall transition to Unattached.SNK when the state of both the CC1 and CC2 pins is SNK.Open for at least tPDDebounce. A DRP shall transition to Unattached.SRC when the state of both the CC1 and CC2 pins is SNK.Open for at least tPDDebounce." This change makes TCPM to wait in SNK_DEBOUNCED state until CC1 and CC2 pins is SNK.Open for at least tPDDebounce. Previously, TCPM resets the port if vbus is not present in PD_T_PS_SOURCE_ON. This causes TCPM to loop continuously when connected to a faulty power source that does not present vbus. Waiting in SNK_DEBOUNCED also ensures that TCPM is adherant to "4.5.2.2.4.2 Exiting from AttachWait.SNK State" requirements. [ 6169.280751] CC1: 0 -> 0, CC2: 0 -> 5 [state TOGGLING, polarity 0, connected] [ 6169.280759] state change TOGGLING -> SNK_ATTACH_WAIT [rev2 NONE_AMS] [ 6169.280771] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @ 170 ms [rev2 NONE_AMS] [ 6169.282427] CC1: 0 -> 0, CC2: 5 -> 5 [state SNK_ATTACH_WAIT, polarity 0, connected] [ 6169.450825] state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED [delayed 170 ms] [ 6169.450834] pending state change SNK_DEBOUNCED -> PORT_RESET @ 480 ms [rev2 NONE_AMS] [ 6169.930892] state change SNK_DEBOUNCED -> PORT_RESET [delayed 480 ms] [ 6169.931296] disable vbus discharge ret:0 [ 6169.931301] Setting usb_comm capable false [ 6169.932783] Setting voltage/current limit 0 mV 0 mA [ 6169.932802] polarity 0 [ 6169.933706] Requesting mux state 0, usb-role 0, orientation 0 [ 6169.936689] cc:=0 [ 6169.936812] pending state change PORT_RESET -> PORT_RESET_WAIT_OFF @ 100 ms [rev2 NONE_AMS] [ 6169.937157] CC1: 0 -> 0, CC2: 5 -> 0 [state PORT_RESET, polarity 0, disconnected] [ 6170.036880] state change PORT_RESET -> PORT_RESET_WAIT_OFF [delayed 100 ms] [ 6170.036890] state change PORT_RESET_WAIT_OFF -> SNK_UNATTACHED [rev2 NONE_AMS] [ 6170.036896] Start toggling [ 6170.041412] CC1: 0 -> 0, CC2: 0 -> 0 [state TOGGLING, polarity 0, disconnected] [ 6170.042973] CC1: 0 -> 0, CC2: 0 -> 5 [state TOGGLING, polarity 0, connected] [ 6170.042976] state change TOGGLING -> SNK_ATTACH_WAIT [rev2 NONE_AMS] [ 6170.042981] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @ 170 ms [rev2 NONE_AMS] [ 6170.213014] state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED [delayed 170 ms] [ 6170.213019] pending state change SNK_DEBOUNCED -> PORT_RESET @ 480 ms [rev2 NONE_AMS] [ 6170.693068] state change SNK_DEBOUNCED -> PORT_RESET [delayed 480 ms] [ 6170.693304] disable vbus discharge ret:0 [ 6170.693308] Setting usb_comm capable false [ 6170.695193] Setting voltage/current limit 0 mV 0 mA [ 6170.695210] polarity 0 [ 6170.695990] Requesting mux state 0, usb-role 0, orientation 0 [ 6170.701896] cc:=0 [ 6170.702181] pending state change PORT_RESET -> PORT_RESET_WAIT_OFF @ 100 ms [rev2 NONE_AMS] [ 6170.703343] CC1: 0 -> 0, CC2: 5 -> 0 [state PORT_RESET, polarity 0, disconnected] Fixes: f0690a25a140b8 ("staging: typec: USB Type-C Port Manager (tcpm)") Cc: stable@vger.kernel.org Acked-by: Heikki Krogerus Signed-off-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20211130001825.3142830-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 7f2f3ff1b391..6010b9901126 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4110,11 +4110,7 @@ static void run_state_machine(struct tcpm_port *port) tcpm_try_src(port) ? SRC_TRY : SNK_ATTACHED, 0); - else - /* Wait for VBUS, but not forever */ - tcpm_set_state(port, PORT_RESET, PD_T_PS_SOURCE_ON); break; - case SRC_TRY: port->try_src_count++; tcpm_set_cc(port, tcpm_rp_cc(port)); From 387c2b6ba197c6df28e75359f7d892f7c8dec204 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Tue, 30 Nov 2021 09:42:39 -0600 Subject: [PATCH 141/253] usb: cdns3: gadget: fix new urb never complete if ep cancel previous requests This issue was found at android12 MTP. 1. MTP submit many out urb request. 2. Cancel left requests (>20) when enough data get from host 3. Send ACK by IN endpoint. 4. MTP submit new out urb request. 5. 4's urb never complete. TRACE LOG: MtpServer-2157 [000] d..3 1287.150391: cdns3_ep_dequeue: ep1out: req: 00000000299e6836, req buff 000000009df42287, length: 0/16384 zsi, status: -115, trb: [start:87, end:87: virt addr 0x80004000ffd50420], flags:1 SID: 0 MtpServer-2157 [000] d..3 1287.150410: cdns3_gadget_giveback: ep1out: req: 00000000299e6836, req buff 000000009df42287, length: 0/16384 zsi, status: -104, trb: [start:87, end:87: virt addr 0x80004000ffd50420], flags:0 SID: 0 MtpServer-2157 [000] d..3 1287.150433: cdns3_ep_dequeue: ep1out: req: 0000000080b7bde6, req buff 000000009ed5c556, length: 0/16384 zsi, status: -115, trb: [start:88, end:88: virt addr 0x80004000ffd5042c], flags:1 SID: 0 MtpServer-2157 [000] d..3 1287.150446: cdns3_gadget_giveback: ep1out: req: 0000000080b7bde6, req buff 000000009ed5c556, length: 0/16384 zsi, status: -104, trb: [start:88, end:88: virt addr 0x80004000ffd5042c], flags:0 SID: 0 .... MtpServer-2157 [000] d..1 1293.630410: cdns3_alloc_request: ep1out: req: 00000000afbccb7d, req buff 0000000000000000, length: 0/0 zsi, status: 0, trb: [start:0, end:0: virt addr (null)], flags:0 SID: 0 MtpServer-2157 [000] d..2 1293.630421: cdns3_ep_queue: ep1out: req: 00000000afbccb7d, req buff 00000000871caf90, length: 0/512 zsi, status: -115, trb: [start:0, end:0: virt addr (null)], flags:0 SID: 0 MtpServer-2157 [000] d..2 1293.630445: cdns3_wa1: WA1: ep1out set guard MtpServer-2157 [000] d..2 1293.630450: cdns3_wa1: WA1: ep1out restore cycle bit MtpServer-2157 [000] d..2 1293.630453: cdns3_prepare_trb: ep1out: trb 000000007317b3ee, dma buf: 0xffd5bc00, size: 512, burst: 128 ctrl: 0x00000424 (C=0, T=0, ISP, IOC, Normal) SID:0 LAST_SID:0 MtpServer-2157 [000] d..2 1293.630460: cdns3_doorbell_epx: ep1out, ep_trbaddr ffd50414 .... irq/241-5b13000-2154 [000] d..1 1293.680849: cdns3_epx_irq: IRQ for ep1out: 01000408 ISP , ep_traddr: ffd508ac ep_last_sid: 00000000 use_streams: 0 irq/241-5b13000-2154 [000] d..1 1293.680858: cdns3_complete_trb: ep1out: trb 0000000021a11b54, dma buf: 0xffd50420, size: 16384, burst: 128 ctrl: 0x00001810 (C=0, T=0, CHAIN, LINK) SID:0 LAST_SID:0 irq/241-5b13000-2154 [000] d..1 1293.680865: cdns3_request_handled: Req: 00000000afbccb7d not handled, DMA pos: 185, ep deq: 88, ep enq: 185, start trb: 184, end trb: 184 Actually DMA pos already bigger than previous submit request afbccb7d's TRB (184-184). The reason of (not handled) is that deq position is wrong. The TRB link is below when irq happen. DEQ LINK LINK LINK LINK LINK .... TRB(afbccb7d):START DMA(EP_TRADDR). Original code check LINK TRB, but DEQ just move one step. LINK DEQ LINK LINK LINK LINK .... TRB(afbccb7d):START DMA(EP_TRADDR). This patch skip all LINK TRB and sync DEQ to trb's start. LINK LINK LINK LINK LINK .... DEQ = TRB(afbccb7d):START DMA(EP_TRADDR). Acked-by: Peter Chen Cc: stable Signed-off-by: Frank Li Signed-off-by: Jun Li Link: https://lore.kernel.org/r/20211130154239.8029-1-Frank.Li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index 1f3b4a142212..f9af7ebe003d 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -337,19 +337,6 @@ static void cdns3_ep_inc_deq(struct cdns3_endpoint *priv_ep) cdns3_ep_inc_trb(&priv_ep->dequeue, &priv_ep->ccs, priv_ep->num_trbs); } -static void cdns3_move_deq_to_next_trb(struct cdns3_request *priv_req) -{ - struct cdns3_endpoint *priv_ep = priv_req->priv_ep; - int current_trb = priv_req->start_trb; - - while (current_trb != priv_req->end_trb) { - cdns3_ep_inc_deq(priv_ep); - current_trb = priv_ep->dequeue; - } - - cdns3_ep_inc_deq(priv_ep); -} - /** * cdns3_allow_enable_l1 - enable/disable permits to transition to L1. * @priv_dev: Extended gadget object @@ -1517,10 +1504,11 @@ static void cdns3_transfer_completed(struct cdns3_device *priv_dev, trb = priv_ep->trb_pool + priv_ep->dequeue; - /* Request was dequeued and TRB was changed to TRB_LINK. */ - if (TRB_FIELD_TO_TYPE(le32_to_cpu(trb->control)) == TRB_LINK) { + /* The TRB was changed as link TRB, and the request was handled at ep_dequeue */ + while (TRB_FIELD_TO_TYPE(le32_to_cpu(trb->control)) == TRB_LINK) { trace_cdns3_complete_trb(priv_ep, trb); - cdns3_move_deq_to_next_trb(priv_req); + cdns3_ep_inc_deq(priv_ep); + trb = priv_ep->trb_pool + priv_ep->dequeue; } if (!request->stream_id) { From 37307f7020ab38dde0892a578249bf63d00bca64 Mon Sep 17 00:00:00 2001 From: Zhou Qingyang Date: Wed, 1 Dec 2021 01:27:00 +0800 Subject: [PATCH 142/253] usb: cdnsp: Fix a NULL pointer dereference in cdnsp_endpoint_init() In cdnsp_endpoint_init(), cdnsp_ring_alloc() is assigned to pep->ring and there is a dereference of it in cdnsp_endpoint_init(), which could lead to a NULL pointer dereference on failure of cdnsp_ring_alloc(). Fix this bug by adding a check of pep->ring. This bug was found by a static analyzer. The analysis employs differential checking to identify inconsistent security operations (e.g., checks or kfrees) between two code paths and confirms that the inconsistent operations are not recovered in the current function or the callers, so they constitute bugs. Note that, as a bug found by static analysis, it can be a false positive or hard to trigger. Multiple researchers have cross-reviewed the bug. Builds with CONFIG_USB_CDNSP_GADGET=y show no new warnings, and our static analyzer no longer warns about this code. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Cc: stable Acked-by: Pawel Laszczak Acked-by: Peter Chen Signed-off-by: Zhou Qingyang Link: https://lore.kernel.org/r/20211130172700.206650-1-zhou1615@umn.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-mem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/cdns3/cdnsp-mem.c b/drivers/usb/cdns3/cdnsp-mem.c index ad9aee3f1e39..97866bfb2da9 100644 --- a/drivers/usb/cdns3/cdnsp-mem.c +++ b/drivers/usb/cdns3/cdnsp-mem.c @@ -987,6 +987,9 @@ int cdnsp_endpoint_init(struct cdnsp_device *pdev, /* Set up the endpoint ring. */ pep->ring = cdnsp_ring_alloc(pdev, 2, ring_type, max_packet, mem_flags); + if (!pep->ring) + return -ENOMEM; + pep->skip = false; /* Fill the endpoint context */ From 9cabe26e65a893afd5846908aa393bd283ab6609 Mon Sep 17 00:00:00 2001 From: Al Cooper Date: Wed, 1 Dec 2021 15:14:02 -0500 Subject: [PATCH 143/253] serial: 8250_bcm7271: UART errors after resuming from S2 There is a small window in time during resume where the hardware flow control signal RTS can be asserted (which allows a sender to resume sending data to the UART) but the baud rate has not yet been restored. This will cause corrupted data and FRAMING, OVERRUN and BREAK errors. This is happening because the MCTRL register is shadowed in uart_port struct and is later used during resume to set the MCTRL register during both serial8250_do_startup() and uart_resume_port(). Unfortunately, serial8250_do_startup() happens before the UART baud rate is restored. The fix is to clear the shadowed mctrl value at the end of suspend and restore it at the end of resume. Fixes: 41a469482de2 ("serial: 8250: Add new 8250-core based Broadcom STB driver") Acked-by: Florian Fainelli Signed-off-by: Al Cooper Link: https://lore.kernel.org/r/20211201201402.47446-1-alcooperx@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_bcm7271.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c index 7f656fac503f..5163d60756b7 100644 --- a/drivers/tty/serial/8250/8250_bcm7271.c +++ b/drivers/tty/serial/8250/8250_bcm7271.c @@ -237,6 +237,7 @@ struct brcmuart_priv { u32 rx_err; u32 rx_timeout; u32 rx_abort; + u32 saved_mctrl; }; static struct dentry *brcmuart_debugfs_root; @@ -1133,16 +1134,27 @@ static int brcmuart_remove(struct platform_device *pdev) static int __maybe_unused brcmuart_suspend(struct device *dev) { struct brcmuart_priv *priv = dev_get_drvdata(dev); + struct uart_8250_port *up = serial8250_get_port(priv->line); + struct uart_port *port = &up->port; serial8250_suspend_port(priv->line); clk_disable_unprepare(priv->baud_mux_clk); + /* + * This will prevent resume from enabling RTS before the + * baud rate has been resored. + */ + priv->saved_mctrl = port->mctrl; + port->mctrl = 0; + return 0; } static int __maybe_unused brcmuart_resume(struct device *dev) { struct brcmuart_priv *priv = dev_get_drvdata(dev); + struct uart_8250_port *up = serial8250_get_port(priv->line); + struct uart_port *port = &up->port; int ret; ret = clk_prepare_enable(priv->baud_mux_clk); @@ -1165,6 +1177,7 @@ static int __maybe_unused brcmuart_resume(struct device *dev) start_rx_dma(serial8250_get_port(priv->line)); } serial8250_resume_port(priv->line); + port->mctrl = priv->saved_mctrl; return 0; } From a226abcd5d427fe9d42efc442818a4a1821e2664 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 2 Dec 2021 19:40:15 -0700 Subject: [PATCH 144/253] io-wq: don't retry task_work creation failure on fatal conditions We don't want to be retrying task_work creation failure if there's an actual signal pending for the parent task. If we do, then we can enter an infinite loop of perpetually retrying and each retry failing with -ERESTARTNOINTR because a signal is pending. Fixes: 3146cba99aa2 ("io-wq: make worker creation resilient against signals") Reported-by: Florian Fischer Link: https://lore.kernel.org/io-uring/20211202165606.mqryio4yzubl7ms5@pasture/ Tested-by: Florian Fischer Signed-off-by: Jens Axboe --- fs/io-wq.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/io-wq.c b/fs/io-wq.c index 88202de519f6..50cf9f92da36 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -714,6 +714,13 @@ static bool io_wq_work_match_all(struct io_wq_work *work, void *data) static inline bool io_should_retry_thread(long err) { + /* + * Prevent perpetual task_work retry, if the task (or its group) is + * exiting. + */ + if (fatal_signal_pending(current)) + return false; + switch (err) { case -EAGAIN: case -ERESTARTSYS: From 2e69e18aec4c1d308b2da461cb6d21500fa441c7 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Tue, 9 Nov 2021 21:47:58 +0800 Subject: [PATCH 145/253] mtd: rawnand: denali: Add the dependency on HAS_IOMEM The helper function devm_platform_ioremap_resource_xxx() needs HAS_IOMEM enabled, so add the dependency on HAS_IOMEM. Fixes: 5f14a8ca1b49 ("mtd: rawnand: denali: Make use of the helper function devm_platform_ioremap_resource_byname()") Signed-off-by: Cai Huoqing Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211109134758.417-1-caihuoqing@baidu.com --- drivers/mtd/nand/raw/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/Kconfig b/drivers/mtd/nand/raw/Kconfig index 67b7cb67c030..0a45d3c6c15b 100644 --- a/drivers/mtd/nand/raw/Kconfig +++ b/drivers/mtd/nand/raw/Kconfig @@ -26,7 +26,7 @@ config MTD_NAND_DENALI_PCI config MTD_NAND_DENALI_DT tristate "Denali NAND controller as a DT device" select MTD_NAND_DENALI - depends on HAS_DMA && HAVE_CLK && OF + depends on HAS_DMA && HAVE_CLK && OF && HAS_IOMEM help Enable the driver for NAND flash on platforms using a Denali NAND controller as a DT device. From 16d8b628a4152e8e8b01b6a1d82e30208ee2dd30 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 19 Nov 2021 16:03:13 +0100 Subject: [PATCH 146/253] mtd: rawnand: Fix nand_erase_op delay NAND_OP_CMD() expects a delay parameter in nanoseconds. The delay value is wrongly given in milliseconds. Fix the conversion macro used in order to set this delay in nanoseconds. Fixes: d7a773e8812b ("mtd: rawnand: Access SDR and NV-DDR timings through a common macro") Signed-off-by: Herve Codina Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211119150316.43080-2-herve.codina@bootlin.com --- drivers/mtd/nand/raw/nand_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 3d6c6e880520..5c6b065837ef 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -1837,7 +1837,7 @@ int nand_erase_op(struct nand_chip *chip, unsigned int eraseblock) NAND_OP_CMD(NAND_CMD_ERASE1, 0), NAND_OP_ADDR(2, addrs, 0), NAND_OP_CMD(NAND_CMD_ERASE2, - NAND_COMMON_TIMING_MS(conf, tWB_max)), + NAND_COMMON_TIMING_NS(conf, tWB_max)), NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf, tBERS_max), 0), }; From 36a65982a98c4bc72fdcfef2c4aaf90193746631 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 19 Nov 2021 16:03:14 +0100 Subject: [PATCH 147/253] mtd: rawnand: Fix nand_choose_best_timings() on unsupported interface When the NV-DDR interface is not supported by the NAND chip, the value of onfi->nvddr_timing_modes is 0. In this case, the best_mode variable value in nand_choose_best_nvddr_timings() is -1. The last for-loop is skipped and the function returns an uninitialized value. If this returned value is 0, the nand_choose_best_sdr_timings() is not executed and no 'best timing' are set. This leads the host controller and the NAND chip working at default mode 0 timing even if a better timing can be used. Fix this uninitialized returned value. nand_choose_best_sdr_timings() is pretty similar to nand_choose_best_nvddr_timings(). Even if onfi->sdr_timing_modes should never be seen as 0, nand_choose_best_sdr_timings() returned value is fixed. Fixes: a9ecc8c814e9 ("mtd: rawnand: Choose the best timings, NV-DDR included") Signed-off-by: Herve Codina Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211119150316.43080-3-herve.codina@bootlin.com --- drivers/mtd/nand/raw/nand_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 5c6b065837ef..a130320de412 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -926,7 +926,7 @@ int nand_choose_best_sdr_timings(struct nand_chip *chip, struct nand_sdr_timings *spec_timings) { const struct nand_controller_ops *ops = chip->controller->ops; - int best_mode = 0, mode, ret; + int best_mode = 0, mode, ret = -EOPNOTSUPP; iface->type = NAND_SDR_IFACE; @@ -977,7 +977,7 @@ int nand_choose_best_nvddr_timings(struct nand_chip *chip, struct nand_nvddr_timings *spec_timings) { const struct nand_controller_ops *ops = chip->controller->ops; - int best_mode = 0, mode, ret; + int best_mode = 0, mode, ret = -EOPNOTSUPP; iface->type = NAND_NVDDR_IFACE; From a4ca0c439f2d5ce9a3dc118d882f9f03449864c8 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 19 Nov 2021 16:03:15 +0100 Subject: [PATCH 148/253] mtd: rawnand: fsmc: Take instruction delay into account The FSMC NAND controller should apply a delay after the instruction has been issued on the bus. The FSMC NAND controller driver did not handle this delay. Add this waiting delay in the FSMC NAND controller driver. Fixes: 4da712e70294 ("mtd: nand: fsmc: use ->exec_op()") Signed-off-by: Herve Codina Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211119150316.43080-4-herve.codina@bootlin.com --- drivers/mtd/nand/raw/fsmc_nand.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c index 658f0cbe7ce8..0a6c9ef0ea8b 100644 --- a/drivers/mtd/nand/raw/fsmc_nand.c +++ b/drivers/mtd/nand/raw/fsmc_nand.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -664,6 +665,9 @@ static int fsmc_exec_op(struct nand_chip *chip, const struct nand_operation *op, instr->ctx.waitrdy.timeout_ms); break; } + + if (instr->delay_ns) + ndelay(instr->delay_ns); } return ret; From 9472335eaa1452b51dc8e8edaa1a342997cb80c7 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 19 Nov 2021 16:03:16 +0100 Subject: [PATCH 149/253] mtd: rawnand: fsmc: Fix timing computation Under certain circumstances, the timing settings calculated by the FSMC NAND controller driver were inaccurate. These settings led to incorrect data reads or fallback to timing mode 0 depending on the NAND chip used. The timing computation did not take into account the following constraint given in SPEAr3xx reference manual: twait >= tCEA - (tset * TCLK) + TOUTDEL + TINDEL Enhance the timings calculation by taking into account this additional constraint. This change has no impact on slow timing modes such as mode 0. Indeed, on mode 0, computed values are the same with and without the patch. NANDs which previously stayed in mode 0 because of fallback to mode 0 can now work at higher speeds and NANDs which were not working at all because of the corrupted data work at high speeds without troubles. Overall improvement on a Micron/MT29F1G08 (flash_speed tool): mode0 mode3 eraseblock write speed 3220 KiB/s 4511 KiB/s eraseblock read speed 4491 KiB/s 7529 KiB/s Fixes: d9fb079571833 ("mtd: nand: fsmc: add support for SDR timings") Signed-off-by: Herve Codina Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211119150316.43080-5-herve.codina@bootlin.com --- drivers/mtd/nand/raw/fsmc_nand.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c index 0a6c9ef0ea8b..6b2bda815b88 100644 --- a/drivers/mtd/nand/raw/fsmc_nand.c +++ b/drivers/mtd/nand/raw/fsmc_nand.c @@ -94,6 +94,14 @@ #define FSMC_BUSY_WAIT_TIMEOUT (1 * HZ) +/* + * According to SPEAr300 Reference Manual (RM0082) + * TOUDEL = 7ns (Output delay from the flip-flops to the board) + * TINDEL = 5ns (Input delay from the board to the flipflop) + */ +#define TOUTDEL 7000 +#define TINDEL 5000 + struct fsmc_nand_timings { u8 tclr; u8 tar; @@ -278,7 +286,7 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host, { unsigned long hclk = clk_get_rate(host->clk); unsigned long hclkn = NSEC_PER_SEC / hclk; - u32 thiz, thold, twait, tset; + u32 thiz, thold, twait, tset, twait_min; if (sdrt->tRC_min < 30000) return -EOPNOTSUPP; @@ -310,13 +318,6 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host, else if (tims->thold > FSMC_THOLD_MASK) tims->thold = FSMC_THOLD_MASK; - twait = max(sdrt->tRP_min, sdrt->tWP_min); - tims->twait = DIV_ROUND_UP(twait / 1000, hclkn) - 1; - if (tims->twait == 0) - tims->twait = 1; - else if (tims->twait > FSMC_TWAIT_MASK) - tims->twait = FSMC_TWAIT_MASK; - tset = max(sdrt->tCS_min - sdrt->tWP_min, sdrt->tCEA_max - sdrt->tREA_max); tims->tset = DIV_ROUND_UP(tset / 1000, hclkn) - 1; @@ -325,6 +326,21 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host, else if (tims->tset > FSMC_TSET_MASK) tims->tset = FSMC_TSET_MASK; + /* + * According to SPEAr300 Reference Manual (RM0082) which gives more + * information related to FSMSC timings than the SPEAr600 one (RM0305), + * twait >= tCEA - (tset * TCLK) + TOUTDEL + TINDEL + */ + twait_min = sdrt->tCEA_max - ((tims->tset + 1) * hclkn * 1000) + + TOUTDEL + TINDEL; + twait = max3(sdrt->tRP_min, sdrt->tWP_min, twait_min); + + tims->twait = DIV_ROUND_UP(twait / 1000, hclkn) - 1; + if (tims->twait == 0) + tims->twait = 1; + else if (tims->twait > FSMC_TWAIT_MASK) + tims->twait = FSMC_TWAIT_MASK; + return 0; } From 27a030e8729255b2068f35c1cd609b532b263311 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Tue, 30 Nov 2021 11:24:43 +0000 Subject: [PATCH 150/253] mtd: dataflash: Add device-tree SPI IDs Commit 5fa6863ba692 ("spi: Check we have a spi_device_id for each DT compatible") added a test to check that every SPI driver has a spi_device_id for each DT compatiable string defined by the driver and warns if the spi_device_id is missing. The spi_device_ids are missing for the dataflash driver and the following warnings are now seen. WARNING KERN SPI driver mtd_dataflash has no spi_device_id for atmel,at45 WARNING KERN SPI driver mtd_dataflash has no spi_device_id for atmel,dataflash Fix this by adding the necessary spi_device_ids. Fixes: 96c8395e2166 ("spi: Revert modalias changes") Signed-off-by: Jon Hunter Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211130112443.107730-1-jonathanh@nvidia.com --- drivers/mtd/devices/mtd_dataflash.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c index 9802e265fca8..2b317ed6c103 100644 --- a/drivers/mtd/devices/mtd_dataflash.c +++ b/drivers/mtd/devices/mtd_dataflash.c @@ -96,6 +96,13 @@ struct dataflash { struct mtd_info mtd; }; +static const struct spi_device_id dataflash_dev_ids[] = { + { "at45" }, + { "dataflash" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, dataflash_dev_ids); + #ifdef CONFIG_OF static const struct of_device_id dataflash_dt_ids[] = { { .compatible = "atmel,at45", }, @@ -927,6 +934,7 @@ static struct spi_driver dataflash_driver = { .name = "mtd_dataflash", .of_match_table = of_match_ptr(dataflash_dt_ids), }, + .id_table = dataflash_dev_ids, .probe = dataflash_probe, .remove = dataflash_remove, From a9418924552e52e63903cbb0310d7537260702bf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Dec 2021 14:42:18 -0800 Subject: [PATCH 151/253] inet: use #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING consistently Since commit 4e1beecc3b58 ("net/sock: Add kernel config SOCK_RX_QUEUE_MAPPING"), sk_rx_queue_mapping access is guarded by CONFIG_SOCK_RX_QUEUE_MAPPING. Fixes: 54b92e841937 ("tcp: Migrate TCP_ESTABLISHED/TCP_SYN_RECV sockets in accept queues.") Signed-off-by: Eric Dumazet Cc: Kuniyuki Iwashima Cc: Daniel Borkmann Cc: Martin KaFai Lau Cc: Tariq Toukan Acked-by: Kuniyuki Iwashima Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f7fea3a7c5e6..62a67fdc344c 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -721,7 +721,7 @@ static struct request_sock *inet_reqsk_clone(struct request_sock *req, sk_node_init(&nreq_sk->sk_node); nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping; -#ifdef CONFIG_XPS +#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping; #endif nreq_sk->sk_incoming_cpu = req_sk->sk_incoming_cpu; From 03cfda4fa6ea9bea2f30160579a78c2b8c1e616e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Dec 2021 15:37:24 -0800 Subject: [PATCH 152/253] tcp: fix another uninit-value (sk_rx_queue_mapping) KMSAN is still not happy [1]. I missed that passive connections do not inherit their sk_rx_queue_mapping values from the request socket, but instead tcp_child_process() is calling sk_mark_napi_id(child, skb) We have many sk_mark_napi_id() callers, so I am providing a new helper, forcing the setting sk_rx_queue_mapping and sk_napi_id. Note that we had no KMSAN report for sk_napi_id because passive connections got a copy of this field from the listener. sk_rx_queue_mapping in the other hand is inside the sk_dontcopy_begin/sk_dontcopy_end so sk_clone_lock() leaves this field uninitialized. We might remove dead code populating req->sk_rx_queue_mapping in the future. [1] BUG: KMSAN: uninit-value in __sk_rx_queue_set include/net/sock.h:1924 [inline] BUG: KMSAN: uninit-value in sk_rx_queue_update include/net/sock.h:1938 [inline] BUG: KMSAN: uninit-value in sk_mark_napi_id include/net/busy_poll.h:136 [inline] BUG: KMSAN: uninit-value in tcp_child_process+0xb42/0x1050 net/ipv4/tcp_minisocks.c:833 __sk_rx_queue_set include/net/sock.h:1924 [inline] sk_rx_queue_update include/net/sock.h:1938 [inline] sk_mark_napi_id include/net/busy_poll.h:136 [inline] tcp_child_process+0xb42/0x1050 net/ipv4/tcp_minisocks.c:833 tcp_v4_rcv+0x3d83/0x4ed0 net/ipv4/tcp_ipv4.c:2066 ip_protocol_deliver_rcu+0x760/0x10b0 net/ipv4/ip_input.c:204 ip_local_deliver_finish net/ipv4/ip_input.c:231 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ip_local_deliver+0x584/0x8c0 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:460 [inline] ip_sublist_rcv_finish net/ipv4/ip_input.c:551 [inline] ip_list_rcv_finish net/ipv4/ip_input.c:601 [inline] ip_sublist_rcv+0x11fd/0x1520 net/ipv4/ip_input.c:609 ip_list_rcv+0x95f/0x9a0 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5505 [inline] __netif_receive_skb_list_core+0xe34/0x1240 net/core/dev.c:5553 __netif_receive_skb_list+0x7fc/0x960 net/core/dev.c:5605 netif_receive_skb_list_internal+0x868/0xde0 net/core/dev.c:5696 gro_normal_list net/core/dev.c:5850 [inline] napi_complete_done+0x579/0xdd0 net/core/dev.c:6587 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0x17b6/0x2350 drivers/net/virtio_net.c:1557 __napi_poll+0x14e/0xbc0 net/core/dev.c:7020 napi_poll net/core/dev.c:7087 [inline] net_rx_action+0x824/0x1880 net/core/dev.c:7174 __do_softirq+0x1fe/0x7eb kernel/softirq.c:558 run_ksoftirqd+0x33/0x50 kernel/softirq.c:920 smpboot_thread_fn+0x616/0xbf0 kernel/smpboot.c:164 kthread+0x721/0x850 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 Uninit was created at: __alloc_pages+0xbc7/0x10a0 mm/page_alloc.c:5409 alloc_pages+0x8a5/0xb80 alloc_slab_page mm/slub.c:1810 [inline] allocate_slab+0x287/0x1c20 mm/slub.c:1947 new_slab mm/slub.c:2010 [inline] ___slab_alloc+0xbdf/0x1e90 mm/slub.c:3039 __slab_alloc mm/slub.c:3126 [inline] slab_alloc_node mm/slub.c:3217 [inline] slab_alloc mm/slub.c:3259 [inline] kmem_cache_alloc+0xbb3/0x11c0 mm/slub.c:3264 sk_prot_alloc+0xeb/0x570 net/core/sock.c:1914 sk_clone_lock+0xd6/0x1940 net/core/sock.c:2118 inet_csk_clone_lock+0x8d/0x6a0 net/ipv4/inet_connection_sock.c:956 tcp_create_openreq_child+0xb1/0x1ef0 net/ipv4/tcp_minisocks.c:453 tcp_v4_syn_recv_sock+0x268/0x2710 net/ipv4/tcp_ipv4.c:1563 tcp_check_req+0x207c/0x2a30 net/ipv4/tcp_minisocks.c:765 tcp_v4_rcv+0x36f5/0x4ed0 net/ipv4/tcp_ipv4.c:2047 ip_protocol_deliver_rcu+0x760/0x10b0 net/ipv4/ip_input.c:204 ip_local_deliver_finish net/ipv4/ip_input.c:231 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ip_local_deliver+0x584/0x8c0 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:460 [inline] ip_sublist_rcv_finish net/ipv4/ip_input.c:551 [inline] ip_list_rcv_finish net/ipv4/ip_input.c:601 [inline] ip_sublist_rcv+0x11fd/0x1520 net/ipv4/ip_input.c:609 ip_list_rcv+0x95f/0x9a0 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5505 [inline] __netif_receive_skb_list_core+0xe34/0x1240 net/core/dev.c:5553 __netif_receive_skb_list+0x7fc/0x960 net/core/dev.c:5605 netif_receive_skb_list_internal+0x868/0xde0 net/core/dev.c:5696 gro_normal_list net/core/dev.c:5850 [inline] napi_complete_done+0x579/0xdd0 net/core/dev.c:6587 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0x17b6/0x2350 drivers/net/virtio_net.c:1557 __napi_poll+0x14e/0xbc0 net/core/dev.c:7020 napi_poll net/core/dev.c:7087 [inline] net_rx_action+0x824/0x1880 net/core/dev.c:7174 __do_softirq+0x1fe/0x7eb kernel/softirq.c:558 Fixes: 342159ee394d ("net: avoid dirtying sk->sk_rx_queue_mapping") Fixes: a37a0ee4d25c ("net: avoid uninit-value from tcp_conn_request") Signed-off-by: Eric Dumazet Reported-by: syzbot Tested-by: Alexander Potapenko Signed-off-by: David S. Miller --- include/net/busy_poll.h | 13 +++++++++++++ net/ipv4/tcp_minisocks.c | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 7994455ec714..c4898fcbf923 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -136,6 +136,19 @@ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) sk_rx_queue_update(sk, skb); } +/* Variant of sk_mark_napi_id() for passive flow setup, + * as sk->sk_napi_id and sk->sk_rx_queue_mapping content + * needs to be set. + */ +static inline void sk_mark_napi_id_set(struct sock *sk, + const struct sk_buff *skb) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + WRITE_ONCE(sk->sk_napi_id, skb->napi_id); +#endif + sk_rx_queue_set(sk, skb); +} + static inline void __sk_mark_napi_id_once(struct sock *sk, unsigned int napi_id) { #ifdef CONFIG_NET_RX_BUSY_POLL diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index cf913a66df17..7c2d3ac2363a 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -829,8 +829,8 @@ int tcp_child_process(struct sock *parent, struct sock *child, int ret = 0; int state = child->sk_state; - /* record NAPI ID of child */ - sk_mark_napi_id(child, skb); + /* record sk_napi_id and sk_rx_queue_mapping of child. */ + sk_mark_napi_id_set(child, skb); tcp_segs_in(tcp_sk(child), skb); if (!sock_owned_by_user(child)) { From dac8e00fb640e9569cdeefd3ce8a75639e5d0711 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Dec 2021 18:27:18 -0800 Subject: [PATCH 153/253] bonding: make tx_rebalance_counter an atomic KCSAN reported a data-race [1] around tx_rebalance_counter which can be accessed from different contexts, without the protection of a lock/mutex. [1] BUG: KCSAN: data-race in bond_alb_init_slave / bond_alb_monitor write to 0xffff888157e8ca24 of 4 bytes by task 7075 on cpu 0: bond_alb_init_slave+0x713/0x860 drivers/net/bonding/bond_alb.c:1613 bond_enslave+0xd94/0x3010 drivers/net/bonding/bond_main.c:1949 do_set_master net/core/rtnetlink.c:2521 [inline] __rtnl_newlink net/core/rtnetlink.c:3475 [inline] rtnl_newlink+0x1298/0x13b0 net/core/rtnetlink.c:3506 rtnetlink_rcv_msg+0x745/0x7e0 net/core/rtnetlink.c:5571 netlink_rcv_skb+0x14e/0x250 net/netlink/af_netlink.c:2491 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:5589 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x5fc/0x6c0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x6e1/0x7d0 net/netlink/af_netlink.c:1916 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg net/socket.c:724 [inline] ____sys_sendmsg+0x39a/0x510 net/socket.c:2409 ___sys_sendmsg net/socket.c:2463 [inline] __sys_sendmsg+0x195/0x230 net/socket.c:2492 __do_sys_sendmsg net/socket.c:2501 [inline] __se_sys_sendmsg net/socket.c:2499 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2499 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff888157e8ca24 of 4 bytes by task 1082 on cpu 1: bond_alb_monitor+0x8f/0xc00 drivers/net/bonding/bond_alb.c:1511 process_one_work+0x3fc/0x980 kernel/workqueue.c:2298 worker_thread+0x616/0xa70 kernel/workqueue.c:2445 kthread+0x2c7/0x2e0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 value changed: 0x00000001 -> 0x00000064 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 1082 Comm: kworker/u4:3 Not tainted 5.16.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: bond1 bond_alb_monitor Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- drivers/net/bonding/bond_alb.c | 14 ++++++++------ include/net/bond_alb.h | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 2ec8e015c7b3..533e476988f2 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1501,14 +1501,14 @@ void bond_alb_monitor(struct work_struct *work) struct slave *slave; if (!bond_has_slaves(bond)) { - bond_info->tx_rebalance_counter = 0; + atomic_set(&bond_info->tx_rebalance_counter, 0); bond_info->lp_counter = 0; goto re_arm; } rcu_read_lock(); - bond_info->tx_rebalance_counter++; + atomic_inc(&bond_info->tx_rebalance_counter); bond_info->lp_counter++; /* send learning packets */ @@ -1530,7 +1530,7 @@ void bond_alb_monitor(struct work_struct *work) } /* rebalance tx traffic */ - if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) { + if (atomic_read(&bond_info->tx_rebalance_counter) >= BOND_TLB_REBALANCE_TICKS) { bond_for_each_slave_rcu(bond, slave, iter) { tlb_clear_slave(bond, slave, 1); if (slave == rcu_access_pointer(bond->curr_active_slave)) { @@ -1540,7 +1540,7 @@ void bond_alb_monitor(struct work_struct *work) bond_info->unbalanced_load = 0; } } - bond_info->tx_rebalance_counter = 0; + atomic_set(&bond_info->tx_rebalance_counter, 0); } if (bond_info->rlb_enabled) { @@ -1610,7 +1610,8 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave) tlb_init_slave(slave); /* order a rebalance ASAP */ - bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; + atomic_set(&bond->alb_info.tx_rebalance_counter, + BOND_TLB_REBALANCE_TICKS); if (bond->alb_info.rlb_enabled) bond->alb_info.rlb_rebalance = 1; @@ -1647,7 +1648,8 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char rlb_clear_slave(bond, slave); } else if (link == BOND_LINK_UP) { /* order a rebalance ASAP */ - bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; + atomic_set(&bond_info->tx_rebalance_counter, + BOND_TLB_REBALANCE_TICKS); if (bond->alb_info.rlb_enabled) { bond->alb_info.rlb_rebalance = 1; /* If the updelay module parameter is smaller than the diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h index f6af76c87a6c..191c36afa1f4 100644 --- a/include/net/bond_alb.h +++ b/include/net/bond_alb.h @@ -126,7 +126,7 @@ struct tlb_slave_info { struct alb_bond_info { struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */ u32 unbalanced_load; - int tx_rebalance_counter; + atomic_t tx_rebalance_counter; int lp_counter; /* -------- rlb parameters -------- */ int rlb_enabled; From 0f8a3b48f91b8dc1f3eff06b77a63a17183fccbd Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Fri, 3 Dec 2021 10:32:13 +0800 Subject: [PATCH 154/253] selftests: net/fcnal-test.sh: add exit code Previously, the selftest framework always treats it as *ok* even though some of them are failed actually. That's because the script always returns 0. It supports PASS/FAIL/SKIP exit code now. CC: Philip Li Reported-by: kernel test robot Signed-off-by: Li Zhijian Signed-off-by: David S. Miller --- tools/testing/selftests/net/fcnal-test.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 7f5b265fcb90..a1da013d847b 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -4077,3 +4077,11 @@ cleanup 2>/dev/null printf "\nTests passed: %3d\n" ${nsuccess} printf "Tests failed: %3d\n" ${nfail} + +if [ $nfail -ne 0 ]; then + exit 1 # KSFT_FAIL +elif [ $nsuccess -eq 0 ]; then + exit $ksft_skip +fi + +exit 0 # KSFT_PASS From 128f6ec95a282b2d8bc1041e59bf65810703fa44 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Fri, 3 Dec 2021 11:31:06 +0800 Subject: [PATCH 155/253] net: bcm4908: Handle dma_set_coherent_mask error codes The return value of dma_set_coherent_mask() is not always 0. To catch the exception in case that dma is not support the mask. Fixes: 9d61d138ab30 ("net: broadcom: rename BCM4908 driver & update DT binding") Signed-off-by: Jiasheng Jiang Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm4908_enet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c index 7cc5213c575a..b07cb9bc5f2d 100644 --- a/drivers/net/ethernet/broadcom/bcm4908_enet.c +++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c @@ -708,7 +708,9 @@ static int bcm4908_enet_probe(struct platform_device *pdev) enet->irq_tx = platform_get_irq_byname(pdev, "tx"); - dma_set_coherent_mask(dev, DMA_BIT_MASK(32)); + err = dma_set_coherent_mask(dev, DMA_BIT_MASK(32)); + if (err) + return err; err = bcm4908_enet_dma_alloc(enet); if (err) From badd7857f5c933a3dc34942a2c11d67fdbdc24de Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Dec 2021 13:11:28 +0300 Subject: [PATCH 156/253] net: altera: set a couple error code in probe() There are two error paths which accidentally return success instead of a negative error code. Fixes: bbd2190ce96d ("Altera TSE: Add main and header file for Altera Ethernet Driver") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/altera/altera_tse_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index d75d95a97dd9..993b2fb42961 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -1430,16 +1430,19 @@ static int altera_tse_probe(struct platform_device *pdev) priv->rxdescmem_busaddr = dma_res->start; } else { + ret = -ENODEV; goto err_free_netdev; } - if (!dma_set_mask(priv->device, DMA_BIT_MASK(priv->dmaops->dmamask))) + if (!dma_set_mask(priv->device, DMA_BIT_MASK(priv->dmaops->dmamask))) { dma_set_coherent_mask(priv->device, DMA_BIT_MASK(priv->dmaops->dmamask)); - else if (!dma_set_mask(priv->device, DMA_BIT_MASK(32))) + } else if (!dma_set_mask(priv->device, DMA_BIT_MASK(32))) { dma_set_coherent_mask(priv->device, DMA_BIT_MASK(32)); - else + } else { + ret = -EIO; goto err_free_netdev; + } /* MAC address space */ ret = request_and_map(pdev, "control_port", &control_port, From 1ac5e21d43b2325854cf3b36b1509b28468dc6fd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 Dec 2021 17:51:59 +0100 Subject: [PATCH 157/253] powercap: DTPM: Drop unused local variable from init_dtpm() The dtpm_descr variable in init_dtpm() is not used after commit f751db8adaea ("powercap/drivers/dtpm: Disable DTPM at boot time"), so drop it. Fixes: f751db8adaea ("powercap/drivers/dtpm: Disable DTPM at boot time") Reported-by: Stephen Rothwell Signed-off-by: Rafael J. Wysocki --- drivers/powercap/dtpm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index fb35c5828bfb..2a5c1829aab7 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -463,8 +463,6 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) static int __init init_dtpm(void) { - struct dtpm_descr *dtpm_descr; - pct = powercap_register_control_type(NULL, "dtpm", NULL); if (IS_ERR(pct)) { pr_err("Failed to register control type\n"); From 1d5379d0475419085d3575bd9155f2e558e96390 Mon Sep 17 00:00:00 2001 From: Michael Sterritt Date: Fri, 19 Nov 2021 15:27:57 -0800 Subject: [PATCH 158/253] x86/sev: Fix SEV-ES INS/OUTS instructions for word, dword, and qword Properly type the operands being passed to __put_user()/__get_user(). Otherwise, these routines truncate data for dependent instructions (e.g., INSW) and only read/write one byte. This has been tested by sending a string with REP OUTSW to a port and then reading it back in with REP INSW on the same port. Previous behavior was to only send and receive the first char of the size. For example, word operations for "abcd" would only read/write "ac". With change, the full string is now written and read back. Fixes: f980f9c31a923 (x86/sev-es: Compile early handler code into kernel image) Signed-off-by: Michael Sterritt Signed-off-by: Borislav Petkov Reviewed-by: Paolo Bonzini Reviewed-by: Marc Orr Reviewed-by: Peter Gonda Reviewed-by: Joerg Roedel Link: https://lkml.kernel.org/r/20211119232757.176201-1-sterritt@google.com --- arch/x86/kernel/sev.c | 57 +++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 74f0ec955384..a9fc2ac7a8bd 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -294,11 +294,6 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, char *dst, char *buf, size_t size) { unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; - char __user *target = (char __user *)dst; - u64 d8; - u32 d4; - u16 d2; - u8 d1; /* * This function uses __put_user() independent of whether kernel or user @@ -320,26 +315,42 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, * instructions here would cause infinite nesting. */ switch (size) { - case 1: + case 1: { + u8 d1; + u8 __user *target = (u8 __user *)dst; + memcpy(&d1, buf, 1); if (__put_user(d1, target)) goto fault; break; - case 2: + } + case 2: { + u16 d2; + u16 __user *target = (u16 __user *)dst; + memcpy(&d2, buf, 2); if (__put_user(d2, target)) goto fault; break; - case 4: + } + case 4: { + u32 d4; + u32 __user *target = (u32 __user *)dst; + memcpy(&d4, buf, 4); if (__put_user(d4, target)) goto fault; break; - case 8: + } + case 8: { + u64 d8; + u64 __user *target = (u64 __user *)dst; + memcpy(&d8, buf, 8); if (__put_user(d8, target)) goto fault; break; + } default: WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); return ES_UNSUPPORTED; @@ -362,11 +373,6 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, char *src, char *buf, size_t size) { unsigned long error_code = X86_PF_PROT; - char __user *s = (char __user *)src; - u64 d8; - u32 d4; - u16 d2; - u8 d1; /* * This function uses __get_user() independent of whether kernel or user @@ -388,26 +394,41 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, * instructions here would cause infinite nesting. */ switch (size) { - case 1: + case 1: { + u8 d1; + u8 __user *s = (u8 __user *)src; + if (__get_user(d1, s)) goto fault; memcpy(buf, &d1, 1); break; - case 2: + } + case 2: { + u16 d2; + u16 __user *s = (u16 __user *)src; + if (__get_user(d2, s)) goto fault; memcpy(buf, &d2, 2); break; - case 4: + } + case 4: { + u32 d4; + u32 __user *s = (u32 __user *)src; + if (__get_user(d4, s)) goto fault; memcpy(buf, &d4, 4); break; - case 8: + } + case 8: { + u64 d8; + u64 __user *s = (u64 __user *)src; if (__get_user(d8, s)) goto fault; memcpy(buf, &d8, 8); break; + } default: WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); return ES_UNSUPPORTED; From c07e45553da1808aa802e9f0ffa8108cfeaf7a17 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 26 Nov 2021 18:11:21 +0800 Subject: [PATCH 159/253] x86/entry: Add a fence for kernel entry SWAPGS in paranoid_entry() Commit 18ec54fdd6d18 ("x86/speculation: Prepare entry code for Spectre v1 swapgs mitigations") added FENCE_SWAPGS_{KERNEL|USER}_ENTRY for conditional SWAPGS. In paranoid_entry(), it uses only FENCE_SWAPGS_KERNEL_ENTRY for both branches. This is because the fence is required for both cases since the CR3 write is conditional even when PTI is enabled. But 96b2371413e8f ("x86/entry/64: Switch CR3 before SWAPGS in paranoid entry") changed the order of SWAPGS and the CR3 write. And it missed the needed FENCE_SWAPGS_KERNEL_ENTRY for the user gsbase case. Add it back by changing the branches so that FENCE_SWAPGS_KERNEL_ENTRY can cover both branches. [ bp: Massage, fix typos, remove obsolete comment while at it. ] Fixes: 96b2371413e8f ("x86/entry/64: Switch CR3 before SWAPGS in paranoid entry") Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211126101209.8613-2-jiangshanlai@gmail.com --- arch/x86/entry/entry_64.S | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e38a4cf795d9..f1a8b5b2af96 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -890,6 +890,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) .Lparanoid_entry_checkgs: /* EBX = 1 -> kernel GSBASE active, no restore required */ movl $1, %ebx + /* * The kernel-enforced convention is a negative GSBASE indicates * a kernel value. No SWAPGS needed on entry and exit. @@ -897,21 +898,14 @@ SYM_CODE_START_LOCAL(paranoid_entry) movl $MSR_GS_BASE, %ecx rdmsr testl %edx, %edx - jns .Lparanoid_entry_swapgs - ret - -.Lparanoid_entry_swapgs: - swapgs - - /* - * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an - * unconditional CR3 write, even in the PTI case. So do an lfence - * to prevent GS speculation, regardless of whether PTI is enabled. - */ - FENCE_SWAPGS_KERNEL_ENTRY + js .Lparanoid_kernel_gsbase /* EBX = 0 -> SWAPGS required on exit */ xorl %ebx, %ebx + swapgs +.Lparanoid_kernel_gsbase: + + FENCE_SWAPGS_KERNEL_ENTRY ret SYM_CODE_END(paranoid_entry) From 054aa8d439b9185d4f5eb9a90282d1ce74772969 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 1 Dec 2021 10:06:14 -0800 Subject: [PATCH 160/253] fget: check that the fd still exists after getting a ref to it Jann Horn points out that there is another possible race wrt Unix domain socket garbage collection, somewhat reminiscent of the one fixed in commit cbcf01128d0a ("af_unix: fix garbage collect vs MSG_PEEK"). See the extended comment about the garbage collection requirements added to unix_peek_fds() by that commit for details. The race comes from how we can locklessly look up a file descriptor just as it is in the process of being closed, and with the right artificial timing (Jann added a few strategic 'mdelay(500)' calls to do that), the Unix domain socket garbage collector could see the reference count decrement of the close() happen before fget() took its reference to the file and the file was attached onto a new file descriptor. This is all (intentionally) correct on the 'struct file *' side, with RCU lookups and lockless reference counting very much part of the design. Getting that reference count out of order isn't a problem per se. But the garbage collector can get confused by seeing this situation of having seen a file not having any remaining external references and then seeing it being attached to an fd. In commit cbcf01128d0a ("af_unix: fix garbage collect vs MSG_PEEK") the fix was to serialize the file descriptor install with the garbage collector by taking and releasing the unix_gc_lock. That's not really an option here, but since this all happens when we are in the process of looking up a file descriptor, we can instead simply just re-check that the file hasn't been closed in the meantime, and just re-do the lookup if we raced with a concurrent close() of the same file descriptor. Reported-and-tested-by: Jann Horn Acked-by: Miklos Szeredi Signed-off-by: Linus Torvalds --- fs/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/file.c b/fs/file.c index 8627dacfc424..ad4a8bf3cf10 100644 --- a/fs/file.c +++ b/fs/file.c @@ -858,6 +858,10 @@ loop: file = NULL; else if (!get_file_rcu_many(file, refs)) goto loop; + else if (files_lookup_fd_raw(files, fd) != file) { + fput_many(file, refs); + goto loop; + } } rcu_read_unlock(); From 1367afaa2ee90d1c956dfc224e199fcb3ff3f8cc Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 26 Nov 2021 18:11:22 +0800 Subject: [PATCH 161/253] x86/entry: Use the correct fence macro after swapgs in kernel CR3 The commit c75890700455 ("x86/entry/64: Remove unneeded kernel CR3 switching") removed a CR3 write in the faulting path of load_gs_index(). But the path's FENCE_SWAPGS_USER_ENTRY has no fence operation if PTI is enabled, see spectre_v1_select_mitigation(). Rather, it depended on the serializing CR3 write of SWITCH_TO_KERNEL_CR3 and since it got removed, add a FENCE_SWAPGS_KERNEL_ENTRY call to make sure speculation is blocked. [ bp: Massage commit message and comment. ] Fixes: c75890700455 ("x86/entry/64: Remove unneeded kernel CR3 switching") Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20211126101209.8613-3-jiangshanlai@gmail.com --- arch/x86/entry/entry_64.S | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f1a8b5b2af96..f9e1c06a1c32 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -987,11 +987,6 @@ SYM_CODE_START_LOCAL(error_entry) pushq %r12 ret -.Lerror_entry_done_lfence: - FENCE_SWAPGS_KERNEL_ENTRY -.Lerror_entry_done: - ret - /* * There are two places in the kernel that can potentially fault with * usergs. Handle them here. B stepping K8s sometimes report a @@ -1014,8 +1009,14 @@ SYM_CODE_START_LOCAL(error_entry) * .Lgs_change's error handler with kernel gsbase. */ SWAPGS - FENCE_SWAPGS_USER_ENTRY - jmp .Lerror_entry_done + + /* + * Issue an LFENCE to prevent GS speculation, regardless of whether it is a + * kernel or user gsbase. + */ +.Lerror_entry_done_lfence: + FENCE_SWAPGS_KERNEL_ENTRY + ret .Lbstep_iret: /* Fix truncated RIP */ From 5c8f6a2e316efebb3ba93d8c1af258155dcf5632 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 26 Nov 2021 18:11:23 +0800 Subject: [PATCH 162/253] x86/xen: Add xenpv_restore_regs_and_return_to_usermode() In the native case, PER_CPU_VAR(cpu_tss_rw + TSS_sp0) is the trampoline stack. But XEN pv doesn't use trampoline stack, so PER_CPU_VAR(cpu_tss_rw + TSS_sp0) is also the kernel stack. In that case, source and destination stacks are identical, which means that reusing swapgs_restore_regs_and_return_to_usermode() in XEN pv would cause %rsp to move up to the top of the kernel stack and leave the IRET frame below %rsp. This is dangerous as it can be corrupted if #NMI / #MC hit as either of these events occurring in the middle of the stack pushing would clobber data on the (original) stack. And, with XEN pv, swapgs_restore_regs_and_return_to_usermode() pushing the IRET frame on to the original address is useless and error-prone when there is any future attempt to modify the code. [ bp: Massage commit message. ] Fixes: 7f2590a110b8 ("x86/entry/64: Use a per-CPU trampoline stack for IDT entries") Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Reviewed-by: Boris Ostrovsky Link: https://lkml.kernel.org/r/20211126101209.8613-4-jiangshanlai@gmail.com --- arch/x86/entry/entry_64.S | 4 ++++ arch/x86/xen/xen-asm.S | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f9e1c06a1c32..97b1f84bb53f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -574,6 +574,10 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) ud2 1: #endif +#ifdef CONFIG_XEN_PV + ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV +#endif + POP_REGS pop_rdi=0 /* diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 220dd9678494..444d824775f6 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -20,6 +20,7 @@ #include #include +#include <../entry/calling.h> .pushsection .noinstr.text, "ax" /* @@ -192,6 +193,25 @@ SYM_CODE_START(xen_iret) jmp hypercall_iret SYM_CODE_END(xen_iret) +/* + * XEN pv doesn't use trampoline stack, PER_CPU_VAR(cpu_tss_rw + TSS_sp0) is + * also the kernel stack. Reusing swapgs_restore_regs_and_return_to_usermode() + * in XEN pv would cause %rsp to move up to the top of the kernel stack and + * leave the IRET frame below %rsp, which is dangerous to be corrupted if #NMI + * interrupts. And swapgs_restore_regs_and_return_to_usermode() pushing the IRET + * frame at the same address is useless. + */ +SYM_CODE_START(xenpv_restore_regs_and_return_to_usermode) + UNWIND_HINT_REGS + POP_REGS + + /* stackleak_erase() can work safely on the kernel stack. */ + STACKLEAK_ERASE_NOCLOBBER + + addq $8, %rsp /* skip regs->orig_ax */ + jmp xen_iret +SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) + /* * Xen handles syscall callbacks much like ordinary exceptions, which * means we have: From 65de262a209da0951eb9bc60b3b7faf3bbffa38a Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 2 Dec 2021 15:29:35 -0300 Subject: [PATCH 163/253] cifs: fix missed refcounting of ipc tcon Fix missed refcounting of IPC tcon used for getting domain-based DFS root referrals. We want to keep it alive as long as mount is active and can be refreshed. For standalone DFS root referrals it wouldn't be a problem as the client ends up having an IPC tcon for both mount and cache. Fixes: c88f7dcd6d64 ("cifs: support nested dfs links over reconnect") Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/cifs/connect.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 6b705026da1a..90f38fb9baa0 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3426,6 +3426,7 @@ static int connect_dfs_root(struct mount_ctx *mnt_ctx, struct dfs_cache_tgt_list */ mount_put_conns(mnt_ctx); mount_get_dfs_conns(mnt_ctx); + set_root_ses(mnt_ctx); full_path = build_unc_path_to_root(ctx, cifs_sb, true); if (IS_ERR(full_path)) From 5bf91ef03d987eb617dffccbb0bf38b2451bf37f Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 2 Dec 2021 07:14:42 +0000 Subject: [PATCH 164/253] cifs: wait for tcon resource_id before getting fscache super The logic for initializing tcon->resource_id is done inside cifs_root_iget. fscache super cookie relies on this for aux data. So we need to push the fscache initialization to this later point during mount. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/connect.c | 6 ------ fs/cifs/fscache.c | 2 +- fs/cifs/inode.c | 7 +++++++ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 90f38fb9baa0..7cc469e4682a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3046,12 +3046,6 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx) cifs_dbg(VFS, "read only mount of RW share\n"); /* no need to log a RW mount of a typical RW share */ } - /* - * The cookie is initialized from volume info returned above. - * Inside cifs_fscache_get_super_cookie it checks - * that we do not get super cookie twice. - */ - cifs_fscache_get_super_cookie(tcon); } /* diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index 7e409a38a2d7..f4da693760c1 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -92,7 +92,7 @@ void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) * In the future, as we integrate with newer fscache features, * we may want to instead add a check if cookie has changed */ - if (tcon->fscache == NULL) + if (tcon->fscache) return; sharename = extract_sharename(tcon->treeName); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 82848412ad85..96d083db1737 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1376,6 +1376,13 @@ iget_no_retry: inode = ERR_PTR(rc); } + /* + * The cookie is initialized from volume info returned above. + * Inside cifs_fscache_get_super_cookie it checks + * that we do not get super cookie twice. + */ + cifs_fscache_get_super_cookie(tcon); + out: kfree(path); free_xid(xid); From 2adc82006bcb067523bedd38e93711c80fd274c1 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 2 Dec 2021 07:30:00 +0000 Subject: [PATCH 165/253] cifs: add server conn_id to fscache client cookie The fscache client cookie uses the server address (and port) as the cookie key. This is a problem when nosharesock is used. Two different connections will use duplicate cookies. Avoid this by adding server->conn_id to the key, so that it's guaranteed that cookie will not be duplicated. Also, for secondary channels of a session, copy the fscache pointer from the primary channel. The primary channel is guaranteed not to go away as long as secondary channels are in use. Also addresses minor problem found by kernel test robot. Reported-by: kernel test robot Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/connect.c | 4 ++++ fs/cifs/fscache.c | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7cc469e4682a..18448dbd762a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1562,6 +1562,10 @@ smbd_connected: /* fscache server cookies are based on primary channel only */ if (!CIFS_SERVER_IS_CHAN(tcp_ses)) cifs_fscache_get_client_cookie(tcp_ses); +#ifdef CONFIG_CIFS_FSCACHE + else + tcp_ses->fscache = tcp_ses->primary_server->fscache; +#endif /* CONFIG_CIFS_FSCACHE */ /* queue echo request delayed work */ queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval); diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index f4da693760c1..1db3437f3b7d 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -24,6 +24,7 @@ struct cifs_server_key { struct in_addr ipv4_addr; struct in6_addr ipv6_addr; }; + __u64 conn_id; } __packed; /* @@ -37,6 +38,14 @@ void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) struct cifs_server_key key; uint16_t key_len = sizeof(key.hdr); + /* + * Check if cookie was already initialized so don't reinitialize it. + * In the future, as we integrate with newer fscache features, + * we may want to instead add a check if cookie has changed + */ + if (server->fscache) + return; + memset(&key, 0, sizeof(key)); /* @@ -62,6 +71,7 @@ void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) server->fscache = NULL; return; } + key.conn_id = server->conn_id; server->fscache = fscache_acquire_cookie(cifs_fscache_netfs.primary_index, From bbb9db5e2a7a1ca0926d26a279000384be21b789 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 2 Dec 2021 07:46:54 +0000 Subject: [PATCH 166/253] cifs: avoid use of dstaddr as key for fscache client cookie server->dstaddr can change when the DNS mapping for the server hostname changes. But conn_id is a u64 counter that is incremented each time a new TCP connection is setup. So use only that as a key. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/fscache.c | 38 +------------------------------------- 1 file changed, 1 insertion(+), 37 deletions(-) diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index 1db3437f3b7d..003c5f1f4dfb 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -16,14 +16,6 @@ * Key layout of CIFS server cache index object */ struct cifs_server_key { - struct { - uint16_t family; /* address family */ - __be16 port; /* IP port */ - } hdr; - union { - struct in_addr ipv4_addr; - struct in6_addr ipv6_addr; - }; __u64 conn_id; } __packed; @@ -32,11 +24,7 @@ struct cifs_server_key { */ void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) { - const struct sockaddr *sa = (struct sockaddr *) &server->dstaddr; - const struct sockaddr_in *addr = (struct sockaddr_in *) sa; - const struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) sa; struct cifs_server_key key; - uint16_t key_len = sizeof(key.hdr); /* * Check if cookie was already initialized so don't reinitialize it. @@ -47,36 +35,12 @@ void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) return; memset(&key, 0, sizeof(key)); - - /* - * Should not be a problem as sin_family/sin6_family overlays - * sa_family field - */ - key.hdr.family = sa->sa_family; - switch (sa->sa_family) { - case AF_INET: - key.hdr.port = addr->sin_port; - key.ipv4_addr = addr->sin_addr; - key_len += sizeof(key.ipv4_addr); - break; - - case AF_INET6: - key.hdr.port = addr6->sin6_port; - key.ipv6_addr = addr6->sin6_addr; - key_len += sizeof(key.ipv6_addr); - break; - - default: - cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family); - server->fscache = NULL; - return; - } key.conn_id = server->conn_id; server->fscache = fscache_acquire_cookie(cifs_fscache_netfs.primary_index, &cifs_fscache_server_index_def, - &key, key_len, + &key, sizeof(key), NULL, 0, server, 0, true); cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", From 8581fd402a0cf80b5298e3b225e7a7bd8f110e69 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Dec 2021 12:34:00 -0800 Subject: [PATCH 167/253] treewide: Add missing includes masked by cgroup -> bpf dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cgroup.h (therefore swap.h, therefore half of the universe) includes bpf.h which in turn includes module.h and slab.h. Since we're about to get rid of that dependency we need to clean things up. v2: drop the cpu.h include from cacheinfo.h, it's not necessary and it makes riscv sensitive to ordering of include files. Signed-off-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov Reviewed-by: Christoph Hellwig Acked-by: Krzysztof Wilczyński Acked-by: Peter Chen Acked-by: SeongJae Park Acked-by: Jani Nikula Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/all/20211120035253.72074-1-kuba@kernel.org/ # v1 Link: https://lore.kernel.org/all/20211120165528.197359-1-kuba@kernel.org/ # cacheinfo discussion Link: https://lore.kernel.org/bpf/20211202203400.1208663-1-kuba@kernel.org --- block/fops.c | 1 + drivers/gpu/drm/drm_gem_shmem_helper.c | 1 + drivers/gpu/drm/i915/gt/intel_gtt.c | 1 + drivers/gpu/drm/i915/i915_request.c | 1 + drivers/gpu/drm/lima/lima_device.c | 1 + drivers/gpu/drm/msm/msm_gem_shrinker.c | 1 + drivers/gpu/drm/ttm/ttm_tt.c | 1 + drivers/net/ethernet/huawei/hinic/hinic_sriov.c | 1 + drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c | 2 ++ drivers/pci/controller/dwc/pci-exynos.c | 1 + drivers/pci/controller/dwc/pcie-qcom-ep.c | 1 + drivers/usb/cdns3/host.c | 1 + include/linux/cacheinfo.h | 1 - include/linux/device/driver.h | 1 + include/linux/filter.h | 2 +- mm/damon/vaddr.c | 1 + mm/memory_hotplug.c | 1 + mm/swap_slots.c | 1 + 18 files changed, 18 insertions(+), 2 deletions(-) diff --git a/block/fops.c b/block/fops.c index ad732a36f9b3..3cb1e81929bc 100644 --- a/block/fops.c +++ b/block/fops.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "blk.h" static inline struct inode *bdev_file_inode(struct file *file) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 7b9f69f21f1e..bca0de92802e 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -9,6 +9,7 @@ #include #include #include +#include #ifdef CONFIG_X86 #include diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 67d14afa6623..b67f620c3d93 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -6,6 +6,7 @@ #include /* fault-inject.h is not standalone! */ #include +#include #include "gem/i915_gem_lmem.h" #include "i915_trace.h" diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 820a1f38b271..89cccefeea63 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "gem/i915_gem_context.h" #include "gt/intel_breadcrumbs.h" diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c index 65fdca366e41..f74f8048af8f 100644 --- a/drivers/gpu/drm/lima/lima_device.c +++ b/drivers/gpu/drm/lima/lima_device.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 4a1420b05e97..086dacf2f26a 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -5,6 +5,7 @@ */ #include +#include #include "msm_drv.h" #include "msm_gem.h" diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 7e83c00a3f48..79c870a3bef8 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c index a78c398bf5b2..01e7d3c0b68e 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "hinic_hw_dev.h" #include "hinic_dev.h" diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c index 0ef68fdd1f26..61c20907315f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c @@ -5,6 +5,8 @@ * */ +#include + #include "otx2_common.h" #include "otx2_ptp.h" diff --git a/drivers/pci/controller/dwc/pci-exynos.c b/drivers/pci/controller/dwc/pci-exynos.c index c24dab383654..722dacdd5a17 100644 --- a/drivers/pci/controller/dwc/pci-exynos.c +++ b/drivers/pci/controller/dwc/pci-exynos.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "pcie-designware.h" diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c b/drivers/pci/controller/dwc/pcie-qcom-ep.c index 7b17da2f9b3f..cfe66bf04c1d 100644 --- a/drivers/pci/controller/dwc/pcie-qcom-ep.c +++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "pcie-designware.h" diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c index 84dadfa726aa..9643b905e2d8 100644 --- a/drivers/usb/cdns3/host.c +++ b/drivers/usb/cdns3/host.c @@ -10,6 +10,7 @@ */ #include +#include #include "core.h" #include "drd.h" #include "host-export.h" diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 2f909ed084c6..4ff37cb763ae 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -3,7 +3,6 @@ #define _LINUX_CACHEINFO_H #include -#include #include #include diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index a498ebcf4993..15e7c5e15d62 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -18,6 +18,7 @@ #include #include #include +#include /** * enum probe_type - device driver probe type to try diff --git a/include/linux/filter.h b/include/linux/filter.h index 534f678ca50f..7f1e88e3e2b5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -6,6 +6,7 @@ #define __LINUX_FILTER_H__ #include +#include #include #include #include @@ -26,7 +27,6 @@ #include #include -#include struct sk_buff; struct sock; diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 35fe49080ee9..47f47f60440e 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "prmtv-common.h" diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 852041f6be41..2a9627dc784c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -35,6 +35,7 @@ #include #include #include +#include #include diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 16f706c55d92..2b5531840583 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include From 2fa7d94afc1afbb4d702760c058dc2d7ed30f226 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 30 Nov 2021 20:16:07 +0200 Subject: [PATCH 168/253] bpf: Fix the off-by-two error in range markings The first commit cited below attempts to fix the off-by-one error that appeared in some comparisons with an open range. Due to this error, arithmetically equivalent pieces of code could get different verdicts from the verifier, for example (pseudocode): // 1. Passes the verifier: if (data + 8 > data_end) return early read *(u64 *)data, i.e. [data; data+7] // 2. Rejected by the verifier (should still pass): if (data + 7 >= data_end) return early read *(u64 *)data, i.e. [data; data+7] The attempted fix, however, shifts the range by one in a wrong direction, so the bug not only remains, but also such piece of code starts failing in the verifier: // 3. Rejected by the verifier, but the check is stricter than in #1. if (data + 8 >= data_end) return early read *(u64 *)data, i.e. [data; data+7] The change performed by that fix converted an off-by-one bug into off-by-two. The second commit cited below added the BPF selftests written to ensure than code chunks like #3 are rejected, however, they should be accepted. This commit fixes the off-by-two error by adjusting new_range in the right direction and fixes the tests by changing the range into the one that should actually fail. Fixes: fb2a311a31d3 ("bpf: fix off by one for range markings with L{T, E} patterns") Fixes: b37242c773b2 ("bpf: add test cases to bpf selftests to cover all access tests") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211130181607.593149-1-maximmi@nvidia.com --- kernel/bpf/verifier.c | 2 +- .../bpf/verifier/xdp_direct_packet_access.c | 32 +++++++++---------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 50efda51515b..f3001937bbb9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8422,7 +8422,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, new_range = dst_reg->off; if (range_right_open) - new_range--; + new_range++; /* Examples for register markings: * diff --git a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c index bfb97383e6b5..de172a5b8754 100644 --- a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c +++ b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c @@ -112,10 +112,10 @@ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -167,10 +167,10 @@ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -274,9 +274,9 @@ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -437,9 +437,9 @@ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -544,10 +544,10 @@ offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -599,10 +599,10 @@ offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -706,9 +706,9 @@ offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -869,9 +869,9 @@ offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, From 8e227b198a55859bf790dc7f4b1e30c0859c6756 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Fri, 3 Dec 2021 09:44:13 -0800 Subject: [PATCH 169/253] qede: validate non LSO skb length Although it is unlikely that stack could transmit a non LSO skb with length > MTU, however in some cases or environment such occurrences actually resulted into firmware asserts due to packet length being greater than the max supported by the device (~9700B). This patch adds the safeguard for such odd cases to avoid firmware asserts. v2: Added "Fixes" tag with one of the initial driver commit which enabled the TX traffic actually (as this was probably day1 issue which was discovered recently by some customer environment) Fixes: a2ec6172d29c ("qede: Add support for link") Signed-off-by: Manish Chopra Signed-off-by: Alok Prasad Signed-off-by: Prabhakar Kushwaha Signed-off-by: Ariel Elior Link: https://lore.kernel.org/r/20211203174413.13090-1-manishc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_fp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index 065e9004598e..999abcfe3310 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1643,6 +1643,13 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev) data_split = true; } } else { + if (unlikely(skb->len > ETH_TX_MAX_NON_LSO_PKT_LEN)) { + DP_ERR(edev, "Unexpected non LSO skb length = 0x%x\n", skb->len); + qede_free_failed_tx_pkt(txq, first_bd, 0, false); + qede_update_tx_producer(txq); + return NETDEV_TX_OK; + } + val |= ((skb->len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT); } From 2be6d4d16a0849455a5c22490e3c5983495fed00 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 2 Dec 2021 14:34:37 +0000 Subject: [PATCH 170/253] net: cdc_ncm: Allow for dwNtbOutMaxSize to be unset or zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, due to the sequential use of min_t() and clamp_t() macros, in cdc_ncm_check_tx_max(), if dwNtbOutMaxSize is not set, the logic sets tx_max to 0. This is then used to allocate the data area of the SKB requested later in cdc_ncm_fill_tx_frame(). This does not cause an issue presently because when memory is allocated during initialisation phase of SKB creation, more memory (512b) is allocated than is required for the SKB headers alone (320b), leaving some space (512b - 320b = 192b) for CDC data (172b). However, if more elements (for example 3 x u64 = [24b]) were added to one of the SKB header structs, say 'struct skb_shared_info', increasing its original size (320b [320b aligned]) to something larger (344b [384b aligned]), then suddenly the CDC data (172b) no longer fits in the spare SKB data area (512b - 384b = 128b). Consequently the SKB bounds checking semantics fails and panics: skbuff: skb_over_panic: text:ffffffff830a5b5f len:184 put:172 \ head:ffff888119227c00 data:ffff888119227c00 tail:0xb8 end:0x80 dev: ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:110! RIP: 0010:skb_panic+0x14f/0x160 net/core/skbuff.c:106 Call Trace: skb_over_panic+0x2c/0x30 net/core/skbuff.c:115 skb_put+0x205/0x210 net/core/skbuff.c:1877 skb_put_zero include/linux/skbuff.h:2270 [inline] cdc_ncm_ndp16 drivers/net/usb/cdc_ncm.c:1116 [inline] cdc_ncm_fill_tx_frame+0x127f/0x3d50 drivers/net/usb/cdc_ncm.c:1293 cdc_ncm_tx_fixup+0x98/0xf0 drivers/net/usb/cdc_ncm.c:1514 By overriding the max value with the default CDC_NCM_NTB_MAX_SIZE_TX when not offered through the system provided params, we ensure enough data space is allocated to handle the CDC data, meaning no crash will occur. Cc: Oliver Neukum Fixes: 289507d3364f9 ("net: cdc_ncm: use sysfs for rx/tx aggregation tuning") Signed-off-by: Lee Jones Reviewed-by: Bjørn Mork Link: https://lore.kernel.org/r/20211202143437.1411410-1-lee.jones@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/usb/cdc_ncm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 24753a4da7e6..e303b522efb5 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -181,6 +181,8 @@ static u32 cdc_ncm_check_tx_max(struct usbnet *dev, u32 new_tx) min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth32); max = min_t(u32, CDC_NCM_NTB_MAX_SIZE_TX, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize)); + if (max == 0) + max = CDC_NCM_NTB_MAX_SIZE_TX; /* dwNtbOutMaxSize not set */ /* some devices set dwNtbOutMaxSize too low for the above default */ min = min(min, max); From 9ed20bafc85806ca6c97c9128cec46c3ef80ae86 Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Fri, 3 Dec 2021 17:32:03 -0600 Subject: [PATCH 171/253] preempt/dynamic: Fix setup_preempt_mode() return value __setup() callbacks expect 1 for success and 0 for failure. Correct the usage here to reflect that. Fixes: 826bfeb37bb4 ("preempt/dynamic: Support dynamic preempt with preempt= boot option") Reported-by: Mark Rutland Signed-off-by: Andrew Halaney Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20211203233203.133581-1-ahalaney@redhat.com --- kernel/sched/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 76f9deeaa942..814c52d90c0f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6617,11 +6617,11 @@ static int __init setup_preempt_mode(char *str) int mode = sched_dynamic_mode(str); if (mode < 0) { pr_warn("Dynamic Preempt: unsupported mode: %s\n", str); - return 1; + return 0; } sched_dynamic_update(mode); - return 0; + return 1; } __setup("preempt=", setup_preempt_mode); From 315c4f884800c45cb6bd8c90422fad554a8b9588 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Thu, 2 Dec 2021 11:20:33 +0000 Subject: [PATCH 172/253] sched/uclamp: Fix rq->uclamp_max not set on first enqueue Commit d81ae8aac85c ("sched/uclamp: Fix initialization of struct uclamp_rq") introduced a bug where uclamp_max of the rq is not reset to match the woken up task's uclamp_max when the rq is idle. The code was relying on rq->uclamp_max initialized to zero, so on first enqueue static inline void uclamp_rq_inc_id(struct rq *rq, struct task_struct *p, enum uclamp_id clamp_id) { ... if (uc_se->value > READ_ONCE(uc_rq->value)) WRITE_ONCE(uc_rq->value, uc_se->value); } was actually resetting it. But since commit d81ae8aac85c changed the default to 1024, this no longer works. And since rq->uclamp_flags is also initialized to 0, neither above code path nor uclamp_idle_reset() update the rq->uclamp_max on first wake up from idle. This is only visible from first wake up(s) until the first dequeue to idle after enabling the static key. And it only matters if the uclamp_max of this task is < 1024 since only then its uclamp_max will be effectively ignored. Fix it by properly initializing rq->uclamp_flags = UCLAMP_FLAG_IDLE to ensure uclamp_idle_reset() is called which then will update the rq uclamp_max value as expected. Fixes: d81ae8aac85c ("sched/uclamp: Fix initialization of struct uclamp_rq") Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Tested-by: Dietmar Eggemann Link: https://lkml.kernel.org/r/20211202112033.1705279-1-qais.yousef@arm.com --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 814c52d90c0f..77563109c0ea 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1918,7 +1918,7 @@ static void __init init_uclamp_rq(struct rq *rq) }; } - rq->uclamp_flags = 0; + rq->uclamp_flags = UCLAMP_FLAG_IDLE; } static void __init init_uclamp(void) From 0f9fee4cdebfbe695c297e5b603a275e2557c1cc Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 4 Dec 2021 21:14:40 +0100 Subject: [PATCH 173/253] parisc: Fix "make install" on newer debian releases On newer debian releases the debian-provided "installkernel" script is installed in /usr/sbin. Fix the kernel install.sh script to look for the script in this directory as well. Signed-off-by: Helge Deller Cc: # v3.13+ --- arch/parisc/install.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/install.sh b/arch/parisc/install.sh index 056d588befdd..70d3cffb0251 100644 --- a/arch/parisc/install.sh +++ b/arch/parisc/install.sh @@ -39,6 +39,7 @@ verify "$3" if [ -n "${INSTALLKERNEL}" ]; then if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi + if [ -x /usr/sbin/${INSTALLKERNEL} ]; then exec /usr/sbin/${INSTALLKERNEL} "$@"; fi fi # Default install From afdb4a5b1d340e4afffc65daa21cc71890d7d589 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 4 Dec 2021 21:21:46 +0100 Subject: [PATCH 174/253] parisc: Mark cr16 CPU clocksource unstable on all SMP machines In commit c8c3735997a3 ("parisc: Enhance detection of synchronous cr16 clocksources") I assumed that CPUs on the same physical core are syncronous. While booting up the kernel on two different C8000 machines, one with a dual-core PA8800 and one with a dual-core PA8900 CPU, this turned out to be wrong. The symptom was that I saw a jump in the internal clocks printed to the syslog and strange overall behaviour. On machines which have 4 cores (2 dual-cores) the problem isn't visible, because the current logic already marked the cr16 clocksource unstable in this case. This patch now marks the cr16 interval timers unstable if we have more than one CPU in the system, and it fixes this issue. Fixes: c8c3735997a3 ("parisc: Enhance detection of synchronous cr16 clocksources") Signed-off-by: Helge Deller Cc: # v5.15+ --- arch/parisc/kernel/time.c | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 9fb1e794831b..061119a56fbe 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -249,30 +249,16 @@ void __init time_init(void) static int __init init_cr16_clocksource(void) { /* - * The cr16 interval timers are not syncronized across CPUs on - * different sockets, so mark them unstable and lower rating on - * multi-socket SMP systems. + * The cr16 interval timers are not syncronized across CPUs, even if + * they share the same socket. */ if (num_online_cpus() > 1 && !running_on_qemu) { - int cpu; - unsigned long cpu0_loc; - cpu0_loc = per_cpu(cpu_data, 0).cpu_loc; + /* mark sched_clock unstable */ + clear_sched_clock_stable(); - for_each_online_cpu(cpu) { - if (cpu == 0) - continue; - if ((cpu0_loc != 0) && - (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc)) - continue; - - /* mark sched_clock unstable */ - clear_sched_clock_stable(); - - clocksource_cr16.name = "cr16_unstable"; - clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; - clocksource_cr16.rating = 0; - break; - } + clocksource_cr16.name = "cr16_unstable"; + clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; + clocksource_cr16.rating = 0; } /* register at clocksource framework */ From 75236f5f2299b502e4b9b267c1ce3bc14a222ceb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 9 Nov 2021 22:23:49 +0000 Subject: [PATCH 175/253] KVM: SEV: Return appropriate error codes if SEV-ES scratch setup fails Return appropriate error codes if setting up the GHCB scratch area for an SEV-ES guest fails. In particular, returning -EINVAL instead of -ENOMEM when allocating the kernel buffer could be confusing as userspace would likely suspect a guest issue. Fixes: 8f423a80d299 ("KVM: SVM: Support MMIO for an SEV-ES guest") Cc: Tom Lendacky Signed-off-by: Sean Christopherson Message-Id: <20211109222350.2266045-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 59727a966f90..f35f59bfdb41 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2530,7 +2530,7 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) } #define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE) -static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) +static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) { struct vmcb_control_area *control = &svm->vmcb->control; struct ghcb *ghcb = svm->sev_es.ghcb; @@ -2541,14 +2541,14 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) scratch_gpa_beg = ghcb_get_sw_scratch(ghcb); if (!scratch_gpa_beg) { pr_err("vmgexit: scratch gpa not provided\n"); - return false; + return -EINVAL; } scratch_gpa_end = scratch_gpa_beg + len; if (scratch_gpa_end < scratch_gpa_beg) { pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n", len, scratch_gpa_beg); - return false; + return -EINVAL; } if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) { @@ -2566,7 +2566,7 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) scratch_gpa_end > ghcb_scratch_end) { pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n", scratch_gpa_beg, scratch_gpa_end); - return false; + return -EINVAL; } scratch_va = (void *)svm->sev_es.ghcb; @@ -2579,18 +2579,18 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) if (len > GHCB_SCRATCH_AREA_LIMIT) { pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n", len, GHCB_SCRATCH_AREA_LIMIT); - return false; + return -EINVAL; } scratch_va = kzalloc(len, GFP_KERNEL_ACCOUNT); if (!scratch_va) - return false; + return -ENOMEM; if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) { /* Unable to copy scratch area from guest */ pr_err("vmgexit: kvm_read_guest for scratch area failed\n"); kfree(scratch_va); - return false; + return -EFAULT; } /* @@ -2606,7 +2606,7 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) svm->sev_es.ghcb_sa = scratch_va; svm->sev_es.ghcb_sa_len = len; - return true; + return 0; } static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask, @@ -2745,10 +2745,10 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) ghcb_set_sw_exit_info_1(ghcb, 0); ghcb_set_sw_exit_info_2(ghcb, 0); - ret = -EINVAL; switch (exit_code) { case SVM_VMGEXIT_MMIO_READ: - if (!setup_vmgexit_scratch(svm, true, control->exit_info_2)) + ret = setup_vmgexit_scratch(svm, true, control->exit_info_2); + if (ret) break; ret = kvm_sev_es_mmio_read(vcpu, @@ -2757,7 +2757,8 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) svm->sev_es.ghcb_sa); break; case SVM_VMGEXIT_MMIO_WRITE: - if (!setup_vmgexit_scratch(svm, false, control->exit_info_2)) + ret = setup_vmgexit_scratch(svm, false, control->exit_info_2); + if (ret) break; ret = kvm_sev_es_mmio_write(vcpu, @@ -2800,6 +2801,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) vcpu_unimpl(vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", control->exit_info_1, control->exit_info_2); + ret = -EINVAL; break; default: ret = svm_invoke_exit_handler(vcpu, exit_code); @@ -2812,6 +2814,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) { int count; int bytes; + int r; if (svm->vmcb->control.exit_info_2 > INT_MAX) return -EINVAL; @@ -2820,8 +2823,9 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) if (unlikely(check_mul_overflow(count, size, &bytes))) return -EINVAL; - if (!setup_vmgexit_scratch(svm, in, bytes)) - return -EINVAL; + r = setup_vmgexit_scratch(svm, in, bytes); + if (r) + return r; return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa, count, in); From a655276a594978a4887520c1241cf6ac49d6230b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 9 Nov 2021 22:23:50 +0000 Subject: [PATCH 176/253] KVM: SEV: Fall back to vmalloc for SEV-ES scratch area if necessary Use kvzalloc() to allocate KVM's buffer for SEV-ES's GHCB scratch area so that KVM falls back to __vmalloc() if physically contiguous memory isn't available. The buffer is purely a KVM software construct, i.e. there's no need for it to be physically contiguous. Cc: Tom Lendacky Signed-off-by: Sean Christopherson Message-Id: <20211109222350.2266045-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index f35f59bfdb41..94bde57df72e 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2260,7 +2260,7 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) __free_page(virt_to_page(svm->sev_es.vmsa)); if (svm->sev_es.ghcb_sa_free) - kfree(svm->sev_es.ghcb_sa); + kvfree(svm->sev_es.ghcb_sa); } static void dump_ghcb(struct vcpu_svm *svm) @@ -2493,7 +2493,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm) svm->sev_es.ghcb_sa_sync = false; } - kfree(svm->sev_es.ghcb_sa); + kvfree(svm->sev_es.ghcb_sa); svm->sev_es.ghcb_sa = NULL; svm->sev_es.ghcb_sa_free = false; } @@ -2581,7 +2581,7 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) len, GHCB_SCRATCH_AREA_LIMIT); return -EINVAL; } - scratch_va = kzalloc(len, GFP_KERNEL_ACCOUNT); + scratch_va = kvzalloc(len, GFP_KERNEL_ACCOUNT); if (!scratch_va) return -ENOMEM; @@ -2589,7 +2589,7 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) /* Unable to copy scratch area from guest */ pr_err("vmgexit: kvm_read_guest for scratch area failed\n"); - kfree(scratch_va); + kvfree(scratch_va); return -EFAULT; } From ad5b353240c8837109d1bcc6c3a9a501d7f6a960 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 2 Dec 2021 12:52:05 -0600 Subject: [PATCH 177/253] KVM: SVM: Do not terminate SEV-ES guests on GHCB validation failure Currently, an SEV-ES guest is terminated if the validation of the VMGEXIT exit code or exit parameters fails. The VMGEXIT instruction can be issued from userspace, even though userspace (likely) can't update the GHCB. To prevent userspace from being able to kill the guest, return an error through the GHCB when validation fails rather than terminating the guest. For cases where the GHCB can't be updated (e.g. the GHCB can't be mapped, etc.), just return back to the guest. The new error codes are documented in the lasest update to the GHCB specification. Fixes: 291bd20d5d88 ("KVM: SVM: Add initial support for a VMGEXIT VMEXIT") Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/sev-common.h | 11 ++++ arch/x86/kvm/svm/sev.c | 106 +++++++++++++++++------------- 2 files changed, 71 insertions(+), 46 deletions(-) diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 2cef6c5a52c2..6acaf5af0a3d 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -73,4 +73,15 @@ #define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) +/* + * Error codes related to GHCB input that can be communicated back to the guest + * by setting the lower 32-bits of the GHCB SW_EXITINFO1 field to 2. + */ +#define GHCB_ERR_NOT_REGISTERED 1 +#define GHCB_ERR_INVALID_USAGE 2 +#define GHCB_ERR_INVALID_SCRATCH_AREA 3 +#define GHCB_ERR_MISSING_INPUT 4 +#define GHCB_ERR_INVALID_INPUT 5 +#define GHCB_ERR_INVALID_EVENT 6 + #endif diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 94bde57df72e..7656a2c5662a 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2352,24 +2352,29 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); } -static int sev_es_validate_vmgexit(struct vcpu_svm *svm) +static bool sev_es_validate_vmgexit(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu; struct ghcb *ghcb; - u64 exit_code = 0; + u64 exit_code; + u64 reason; ghcb = svm->sev_es.ghcb; - /* Only GHCB Usage code 0 is supported */ - if (ghcb->ghcb_usage) - goto vmgexit_err; - /* - * Retrieve the exit code now even though is may not be marked valid + * Retrieve the exit code now even though it may not be marked valid * as it could help with debugging. */ exit_code = ghcb_get_sw_exit_code(ghcb); + /* Only GHCB Usage code 0 is supported */ + if (ghcb->ghcb_usage) { + reason = GHCB_ERR_INVALID_USAGE; + goto vmgexit_err; + } + + reason = GHCB_ERR_MISSING_INPUT; + if (!ghcb_sw_exit_code_is_valid(ghcb) || !ghcb_sw_exit_info_1_is_valid(ghcb) || !ghcb_sw_exit_info_2_is_valid(ghcb)) @@ -2448,30 +2453,34 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) case SVM_VMGEXIT_UNSUPPORTED_EVENT: break; default: + reason = GHCB_ERR_INVALID_EVENT; goto vmgexit_err; } - return 0; + return true; vmgexit_err: vcpu = &svm->vcpu; - if (ghcb->ghcb_usage) { + if (reason == GHCB_ERR_INVALID_USAGE) { vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n", ghcb->ghcb_usage); + } else if (reason == GHCB_ERR_INVALID_EVENT) { + vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n", + exit_code); } else { - vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n", + vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n", exit_code); dump_ghcb(svm); } - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; - vcpu->run->internal.ndata = 2; - vcpu->run->internal.data[0] = exit_code; - vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; + /* Clear the valid entries fields */ + memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); - return -EINVAL; + ghcb_set_sw_exit_info_1(ghcb, 2); + ghcb_set_sw_exit_info_2(ghcb, reason); + + return false; } void sev_es_unmap_ghcb(struct vcpu_svm *svm) @@ -2530,7 +2539,7 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) } #define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE) -static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) +static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) { struct vmcb_control_area *control = &svm->vmcb->control; struct ghcb *ghcb = svm->sev_es.ghcb; @@ -2541,14 +2550,14 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) scratch_gpa_beg = ghcb_get_sw_scratch(ghcb); if (!scratch_gpa_beg) { pr_err("vmgexit: scratch gpa not provided\n"); - return -EINVAL; + goto e_scratch; } scratch_gpa_end = scratch_gpa_beg + len; if (scratch_gpa_end < scratch_gpa_beg) { pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n", len, scratch_gpa_beg); - return -EINVAL; + goto e_scratch; } if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) { @@ -2566,7 +2575,7 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) scratch_gpa_end > ghcb_scratch_end) { pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n", scratch_gpa_beg, scratch_gpa_end); - return -EINVAL; + goto e_scratch; } scratch_va = (void *)svm->sev_es.ghcb; @@ -2579,18 +2588,18 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) if (len > GHCB_SCRATCH_AREA_LIMIT) { pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n", len, GHCB_SCRATCH_AREA_LIMIT); - return -EINVAL; + goto e_scratch; } scratch_va = kvzalloc(len, GFP_KERNEL_ACCOUNT); if (!scratch_va) - return -ENOMEM; + goto e_scratch; if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) { /* Unable to copy scratch area from guest */ pr_err("vmgexit: kvm_read_guest for scratch area failed\n"); kvfree(scratch_va); - return -EFAULT; + goto e_scratch; } /* @@ -2606,7 +2615,13 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) svm->sev_es.ghcb_sa = scratch_va; svm->sev_es.ghcb_sa_len = len; - return 0; + return true; + +e_scratch: + ghcb_set_sw_exit_info_1(ghcb, 2); + ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_SCRATCH_AREA); + + return false; } static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask, @@ -2657,7 +2672,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_CPUID); if (!ret) { - ret = -EINVAL; + /* Error, keep GHCB MSR value as-is */ break; } @@ -2693,10 +2708,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) GHCB_MSR_TERM_REASON_POS); pr_info("SEV-ES guest requested termination: %#llx:%#llx\n", reason_set, reason_code); - fallthrough; + + ret = -EINVAL; + break; } default: - ret = -EINVAL; + /* Error, keep GHCB MSR value as-is */ + break; } trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id, @@ -2720,14 +2738,18 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) if (!ghcb_gpa) { vcpu_unimpl(vcpu, "vmgexit: GHCB gpa is not set\n"); - return -EINVAL; + + /* Without a GHCB, just return right back to the guest */ + return 1; } if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) { /* Unable to map GHCB from guest */ vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n", ghcb_gpa); - return -EINVAL; + + /* Without a GHCB, just return right back to the guest */ + return 1; } svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva; @@ -2737,18 +2759,17 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) exit_code = ghcb_get_sw_exit_code(ghcb); - ret = sev_es_validate_vmgexit(svm); - if (ret) - return ret; + if (!sev_es_validate_vmgexit(svm)) + return 1; sev_es_sync_from_ghcb(svm); ghcb_set_sw_exit_info_1(ghcb, 0); ghcb_set_sw_exit_info_2(ghcb, 0); + ret = 1; switch (exit_code) { case SVM_VMGEXIT_MMIO_READ: - ret = setup_vmgexit_scratch(svm, true, control->exit_info_2); - if (ret) + if (!setup_vmgexit_scratch(svm, true, control->exit_info_2)) break; ret = kvm_sev_es_mmio_read(vcpu, @@ -2757,8 +2778,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) svm->sev_es.ghcb_sa); break; case SVM_VMGEXIT_MMIO_WRITE: - ret = setup_vmgexit_scratch(svm, false, control->exit_info_2); - if (ret) + if (!setup_vmgexit_scratch(svm, false, control->exit_info_2)) break; ret = kvm_sev_es_mmio_write(vcpu, @@ -2787,14 +2807,10 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) default: pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n", control->exit_info_1); - ghcb_set_sw_exit_info_1(ghcb, 1); - ghcb_set_sw_exit_info_2(ghcb, - X86_TRAP_UD | - SVM_EVTINJ_TYPE_EXEPT | - SVM_EVTINJ_VALID); + ghcb_set_sw_exit_info_1(ghcb, 2); + ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_INPUT); } - ret = 1; break; } case SVM_VMGEXIT_UNSUPPORTED_EVENT: @@ -2814,7 +2830,6 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) { int count; int bytes; - int r; if (svm->vmcb->control.exit_info_2 > INT_MAX) return -EINVAL; @@ -2823,9 +2838,8 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) if (unlikely(check_mul_overflow(count, size, &bytes))) return -EINVAL; - r = setup_vmgexit_scratch(svm, in, bytes); - if (r) - return r; + if (!setup_vmgexit_scratch(svm, in, bytes)) + return 1; return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa, count, in); From 1ff2fc02862d52e18fd3daabcfe840ec27e920a8 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 20 Oct 2021 13:02:11 -0500 Subject: [PATCH 178/253] x86/sme: Explicitly map new EFI memmap table as encrypted Reserving memory using efi_mem_reserve() calls into the x86 efi_arch_mem_reserve() function. This function will insert a new EFI memory descriptor into the EFI memory map representing the area of memory to be reserved and marking it as EFI runtime memory. As part of adding this new entry, a new EFI memory map is allocated and mapped. The mapping is where a problem can occur. This new memory map is mapped using early_memremap() and generally mapped encrypted, unless the new memory for the mapping happens to come from an area of memory that is marked as EFI_BOOT_SERVICES_DATA memory. In this case, the new memory will be mapped unencrypted. However, during replacement of the old memory map, efi_mem_type() is disabled, so the new memory map will now be long-term mapped encrypted (in efi.memmap), resulting in the map containing invalid data and causing the kernel boot to crash. Since it is known that the area will be mapped encrypted going forward, explicitly map the new memory map as encrypted using early_memremap_prot(). Cc: # 4.14.x Fixes: 8f716c9b5feb ("x86/mm: Add support to access boot related data in the clear") Link: https://lore.kernel.org/all/ebf1eb2940405438a09d51d121ec0d02c8755558.1634752931.git.thomas.lendacky@amd.com/ Signed-off-by: Tom Lendacky [ardb: incorporate Kconfig fix by Arnd] Signed-off-by: Ard Biesheuvel --- arch/x86/Kconfig | 1 + arch/x86/platform/efi/quirks.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 95dd1ee01546..9636a3122496 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1932,6 +1932,7 @@ config EFI depends on ACPI select UCS2_STRING select EFI_RUNTIME_WRAPPERS + select ARCH_USE_MEMREMAP_PROT help This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index b15ebfe40a73..b0b848d6933a 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -277,7 +277,8 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) return; } - new = early_memremap(data.phys_map, data.size); + new = early_memremap_prot(data.phys_map, data.size, + pgprot_val(pgprot_encrypted(FIXMAP_PAGE_NORMAL))); if (!new) { pr_err("Failed to map new boot services memmap\n"); return; From 0fcfb00b28c0b7884635dacf38e46d60bf3d4eb1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 5 Dec 2021 14:08:22 -0800 Subject: [PATCH 179/253] Linux 5.16-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0a6ecc8bb2d2..8e35d7804fef 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Gobble Gobble # *DOCUMENTATION* From 1a1aa356ddf3f16539f5962c01c5f702686dfc15 Mon Sep 17 00:00:00 2001 From: Michal Maloszewski Date: Tue, 26 Oct 2021 12:59:09 +0000 Subject: [PATCH 180/253] iavf: Fix reporting when setting descriptor count iavf_set_ringparams doesn't communicate to the user that 1. The user requested descriptor count is out of range. Instead it just quietly sets descriptors to the "clamped" value and calls it done. This makes it look an invalid value was successfully set as the descriptor count when this isn't actually true. 2. The user provided descriptor count needs to be inflated for alignment reasons. This behavior is confusing. The ice driver has already addressed this by rejecting invalid values for descriptor count and messaging for alignment adjustments. Do the same thing here by adding the error and info messages. Fixes: fbb7ddfef253 ("i40evf: core ethtool functionality") Signed-off-by: Anirudh Venkataramanan Signed-off-by: Michal Maloszewski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/iavf/iavf_ethtool.c | 43 ++++++++++++++----- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 0cecaff38d04..461f5237a2f8 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -615,23 +615,44 @@ static int iavf_set_ringparam(struct net_device *netdev, if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) return -EINVAL; - new_tx_count = clamp_t(u32, ring->tx_pending, - IAVF_MIN_TXD, - IAVF_MAX_TXD); - new_tx_count = ALIGN(new_tx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE); + if (ring->tx_pending > IAVF_MAX_TXD || + ring->tx_pending < IAVF_MIN_TXD || + ring->rx_pending > IAVF_MAX_RXD || + ring->rx_pending < IAVF_MIN_RXD) { + netdev_err(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d] (increment %d)\n", + ring->tx_pending, ring->rx_pending, IAVF_MIN_TXD, + IAVF_MAX_RXD, IAVF_REQ_DESCRIPTOR_MULTIPLE); + return -EINVAL; + } - new_rx_count = clamp_t(u32, ring->rx_pending, - IAVF_MIN_RXD, - IAVF_MAX_RXD); - new_rx_count = ALIGN(new_rx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE); + new_tx_count = ALIGN(ring->tx_pending, IAVF_REQ_DESCRIPTOR_MULTIPLE); + if (new_tx_count != ring->tx_pending) + netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n", + new_tx_count); + + new_rx_count = ALIGN(ring->rx_pending, IAVF_REQ_DESCRIPTOR_MULTIPLE); + if (new_rx_count != ring->rx_pending) + netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n", + new_rx_count); /* if nothing to do return success */ if ((new_tx_count == adapter->tx_desc_count) && - (new_rx_count == adapter->rx_desc_count)) + (new_rx_count == adapter->rx_desc_count)) { + netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n"); return 0; + } - adapter->tx_desc_count = new_tx_count; - adapter->rx_desc_count = new_rx_count; + if (new_tx_count != adapter->tx_desc_count) { + netdev_dbg(netdev, "Changing Tx descriptor count from %d to %d\n", + adapter->tx_desc_count, new_tx_count); + adapter->tx_desc_count = new_tx_count; + } + + if (new_rx_count != adapter->rx_desc_count) { + netdev_dbg(netdev, "Changing Rx descriptor count from %d to %d\n", + adapter->rx_desc_count, new_rx_count); + adapter->rx_desc_count = new_rx_count; + } if (netif_running(netdev)) { adapter->flags |= IAVF_FLAG_RESET_NEEDED; From 61125b8be85dfbc7e9c7fe1cc6c6d631ab603516 Mon Sep 17 00:00:00 2001 From: Karen Sornek Date: Fri, 14 May 2021 11:43:13 +0200 Subject: [PATCH 181/253] i40e: Fix failed opcode appearing if handling messages from VF Fix failed operation code appearing if handling messages from VF. Implemented by waiting for VF appropriate state if request starts handle while VF reset. Without this patch the message handling request while VF is in a reset state ends with error -5 (I40E_ERR_PARAM). Fixes: 5c3c48ac6bf5 ("i40e: implement virtual device interface") Signed-off-by: Grzegorz Szczurek Signed-off-by: Karen Sornek Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 70 +++++++++++++------ .../ethernet/intel/i40e/i40e_virtchnl_pf.h | 2 + 2 files changed, 50 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 80ae264c99ba..f651861442c2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1948,6 +1948,32 @@ static int i40e_vc_send_resp_to_vf(struct i40e_vf *vf, return i40e_vc_send_msg_to_vf(vf, opcode, retval, NULL, 0); } +/** + * i40e_sync_vf_state + * @vf: pointer to the VF info + * @state: VF state + * + * Called from a VF message to synchronize the service with a potential + * VF reset state + **/ +static bool i40e_sync_vf_state(struct i40e_vf *vf, enum i40e_vf_states state) +{ + int i; + + /* When handling some messages, it needs VF state to be set. + * It is possible that this flag is cleared during VF reset, + * so there is a need to wait until the end of the reset to + * handle the request message correctly. + */ + for (i = 0; i < I40E_VF_STATE_WAIT_COUNT; i++) { + if (test_bit(state, &vf->vf_states)) + return true; + usleep_range(10000, 20000); + } + + return test_bit(state, &vf->vf_states); +} + /** * i40e_vc_get_version_msg * @vf: pointer to the VF info @@ -2008,7 +2034,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) size_t len = 0; int ret; - if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_INIT)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -2131,7 +2157,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, u8 *msg) bool allmulti = false; bool alluni = false; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err_out; } @@ -2219,7 +2245,7 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) struct i40e_vsi *vsi; u16 num_qps_all = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2368,7 +2394,7 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2540,7 +2566,7 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg) struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2590,7 +2616,7 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg) u8 cur_pairs = vf->num_queue_pairs; struct i40e_pf *pf = vf->pf; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) return -EINVAL; if (req_pairs > I40E_MAX_VF_QUEUES) { @@ -2635,7 +2661,7 @@ static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg) memset(&stats, 0, sizeof(struct i40e_eth_stats)); - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2752,7 +2778,7 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg) i40e_status ret = 0; int i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) { ret = I40E_ERR_PARAM; goto error_param; @@ -2824,7 +2850,7 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) i40e_status ret = 0; int i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) { ret = I40E_ERR_PARAM; goto error_param; @@ -2968,7 +2994,7 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, vfl->vsi_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; @@ -3088,9 +3114,9 @@ static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg) struct i40e_vsi *vsi = NULL; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, vrk->vsi_id) || - (vrk->key_len != I40E_HKEY_ARRAY_SIZE)) { + vrk->key_len != I40E_HKEY_ARRAY_SIZE) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3119,9 +3145,9 @@ static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; u16 i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, vrl->vsi_id) || - (vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE)) { + vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3154,7 +3180,7 @@ static int i40e_vc_get_rss_hena(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int len = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3190,7 +3216,7 @@ static int i40e_vc_set_rss_hena(struct i40e_vf *vf, u8 *msg) struct i40e_hw *hw = &pf->hw; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3215,7 +3241,7 @@ static int i40e_vc_enable_vlan_stripping(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; struct i40e_vsi *vsi; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3241,7 +3267,7 @@ static int i40e_vc_disable_vlan_stripping(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; struct i40e_vsi *vsi; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3468,7 +3494,7 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i, ret; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3599,7 +3625,7 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i, ret; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err_out; } @@ -3708,7 +3734,7 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; u64 speed = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3824,7 +3850,7 @@ static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg) struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 091e32c1bb46..49575a640a84 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -18,6 +18,8 @@ #define I40E_MAX_VF_PROMISC_FLAGS 3 +#define I40E_VF_STATE_WAIT_COUNT 20 + /* Various queue ctrls */ enum i40e_queue_ctrl { I40E_QUEUE_CTRL_UNKNOWN = 0, From 8aa55ab422d9d0d825ebfb877702ed661e96e682 Mon Sep 17 00:00:00 2001 From: Mateusz Palczewski Date: Fri, 16 Jul 2021 11:33:56 +0200 Subject: [PATCH 182/253] i40e: Fix pre-set max number of queues for VF After setting pre-set combined to 16 queues and reserving 16 queues by tc qdisc, pre-set maximum combined queues returned to default value after VF reset being 4 and this generated errors during removing tc. Fixed by removing clear num_req_queues before reset VF. Fixes: e284fc280473 (i40e: Add and delete cloud filter) Signed-off-by: Grzegorz Szczurek Signed-off-by: Mateusz Palczewski Tested-by: Bindushree P Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index f651861442c2..2ea4deb8fc44 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3823,11 +3823,6 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) /* set this flag only after making sure all inputs are sane */ vf->adq_enabled = true; - /* num_req_queues is set when user changes number of queues via ethtool - * and this causes issue for default VSI(which depends on this variable) - * when ADq is enabled, hence reset it. - */ - vf->num_req_queues = 0; /* reset the VF in order to allocate resources */ i40e_vc_reset_vf(vf, true); From 23ec111bf3549aae37140330c31a16abfc172421 Mon Sep 17 00:00:00 2001 From: Norbert Zulinski Date: Mon, 22 Nov 2021 12:29:05 +0100 Subject: [PATCH 183/253] i40e: Fix NULL pointer dereference in i40e_dbg_dump_desc When trying to dump VFs VSI RX/TX descriptors using debugfs there was a crash due to NULL pointer dereference in i40e_dbg_dump_desc. Added a check to i40e_dbg_dump_desc that checks if VSI type is correct for dumping RX/TX descriptors. Fixes: 02e9c290814c ("i40e: debugfs interface") Signed-off-by: Sylwester Dziedziuch Signed-off-by: Norbert Zulinski Signed-off-by: Mateusz Palczewski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 291e61ac3e44..2c1b1da1220e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -553,6 +553,14 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, dev_info(&pf->pdev->dev, "vsi %d not found\n", vsi_seid); return; } + if (vsi->type != I40E_VSI_MAIN && + vsi->type != I40E_VSI_FDIR && + vsi->type != I40E_VSI_VMDQ2) { + dev_info(&pf->pdev->dev, + "vsi %d type %d descriptor rings not available\n", + vsi_seid, vsi->type); + return; + } if (type == RING_TYPE_XDP && !i40e_enabled_xdp_vsi(vsi)) { dev_info(&pf->pdev->dev, "XDP not enabled on VSI %d\n", vsi_seid); return; From 7d0c009043f6a970f62dbf5aecda9f8c3ccafcff Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 3 Dec 2021 14:28:10 -0700 Subject: [PATCH 184/253] platform/x86/intel: hid: add quirk to support Surface Go 3 Similar to other systems Surface Go 3 requires a DMI quirk to enable 5 button array for power and volume buttons. Buglink: https://github.com/linux-surface/linux-surface/issues/595 Cc: stable@vger.kernel.org Signed-off-by: Alex Hung Link: https://lore.kernel.org/r/20211203212810.2666508-1-alex.hung@canonical.com Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/hid.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 08598942a6d7..13f8cf70b9ae 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -99,6 +99,13 @@ static const struct dmi_system_id button_array_table[] = { DMI_MATCH(DMI_PRODUCT_FAMILY, "ThinkPad X1 Tablet Gen 2"), }, }, + { + .ident = "Microsoft Surface Go 3", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 3"), + }, + }, { } }; From cd8c917a56f20f48748dd43d9ae3caff51d5b987 Mon Sep 17 00:00:00 2001 From: Salvatore Bonaccorso Date: Mon, 6 Dec 2021 21:42:01 +0100 Subject: [PATCH 185/253] Makefile: Do not quote value for CONFIG_CC_IMPLICIT_FALLTHROUGH Andreas reported that a specific build environment for an external module, being a bit broken, does pass CC_IMPLICIT_FALLTHROUGH quoted as argument to gcc, causing an error gcc-11: error: "-Wimplicit-fallthrough=5": linker input file not found: No such file or directory Until this is more generally fixed as outlined in [1], by fixing scripts/link-vmlinux.sh, scripts/gen_autoksyms.sh, etc to not directly include the include/config/auto.conf, and in a second step, change Kconfig to generate the auto.conf without "", workaround the issue by explicitly unquoting CC_IMPLICIT_FALLTHROUGH. Reported-by: Andreas Beckmann Link: https://bugs.debian.org/1001083 Link: https://lore.kernel.org/linux-kbuild/CAK7LNAR-VXwHFEJqCcrFDZj+_4+Xd6oynbj_0eS8N504_ydmyw@mail.gmail.com/ [1] Signed-off-by: Salvatore Bonaccorso Reviewed-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: Linus Torvalds --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8e35d7804fef..ef967a26bcd3 100644 --- a/Makefile +++ b/Makefile @@ -789,7 +789,7 @@ stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG) := -fstack-protector-strong KBUILD_CFLAGS += $(stackp-flags-y) KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror -KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH) +KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH:"%"=%) ifdef CONFIG_CC_IS_CLANG KBUILD_CPPFLAGS += -Qunused-arguments From dde91ccfa25fd58f64c397d91b81a4b393100ffa Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Fri, 3 Dec 2021 11:13:18 +0100 Subject: [PATCH 186/253] ethtool: do not perform operations on net devices being unregistered There is a short period between a net device starts to be unregistered and when it is actually gone. In that time frame ethtool operations could still be performed, which might end up in unwanted or undefined behaviours[1]. Do not allow ethtool operations after a net device starts its unregistration. This patch targets the netlink part as the ioctl one isn't affected: the reference to the net device is taken and the operation is executed within an rtnl lock section and the net device won't be found after unregister. [1] For example adding Tx queues after unregister ends up in NULL pointer exceptions and UaFs, such as: BUG: KASAN: use-after-free in kobject_get+0x14/0x90 Read of size 1 at addr ffff88801961248c by task ethtool/755 CPU: 0 PID: 755 Comm: ethtool Not tainted 5.15.0-rc6+ #778 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-4.fc34 04/014 Call Trace: dump_stack_lvl+0x57/0x72 print_address_description.constprop.0+0x1f/0x140 kasan_report.cold+0x7f/0x11b kobject_get+0x14/0x90 kobject_add_internal+0x3d1/0x450 kobject_init_and_add+0xba/0xf0 netdev_queue_update_kobjects+0xcf/0x200 netif_set_real_num_tx_queues+0xb4/0x310 veth_set_channels+0x1c3/0x550 ethnl_set_channels+0x524/0x610 Fixes: 041b1c5d4a53 ("ethtool: helper functions for netlink interface") Suggested-by: Jakub Kicinski Signed-off-by: Antoine Tenart Link: https://lore.kernel.org/r/20211203101318.435618-1-atenart@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 38b44c0291b1..96f4180aabd2 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -40,7 +40,8 @@ int ethnl_ops_begin(struct net_device *dev) if (dev->dev.parent) pm_runtime_get_sync(dev->dev.parent); - if (!netif_device_present(dev)) { + if (!netif_device_present(dev) || + dev->reg_state == NETREG_UNREGISTERING) { ret = -ENODEV; goto err; } From 4dbb0dad8e63fcd0b5a117c2861d2abe7ff5f186 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 5 Dec 2021 11:28:22 -0800 Subject: [PATCH 187/253] devlink: fix netns refcount leak in devlink_nl_cmd_reload() While preparing my patch series adding netns refcount tracking, I spotted bugs in devlink_nl_cmd_reload() Some error paths forgot to release a refcount on a netns. To fix this, we can reduce the scope of get_net()/put_net() section around the call to devlink_reload(). Fixes: ccdf07219da6 ("devlink: Add reload action option to devlink reload command") Fixes: dc64cc7c6310 ("devlink: Add devlink reload limit option") Signed-off-by: Eric Dumazet Cc: Moshe Shemesh Cc: Jacob Keller Cc: Jiri Pirko Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20211205192822.1741045-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 5ad72dbfcd07..c06c9ba6e8c5 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4110,14 +4110,6 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) return err; } - if (info->attrs[DEVLINK_ATTR_NETNS_PID] || - info->attrs[DEVLINK_ATTR_NETNS_FD] || - info->attrs[DEVLINK_ATTR_NETNS_ID]) { - dest_net = devlink_netns_get(skb, info); - if (IS_ERR(dest_net)) - return PTR_ERR(dest_net); - } - if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION]) action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]); else @@ -4160,6 +4152,14 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } } + if (info->attrs[DEVLINK_ATTR_NETNS_PID] || + info->attrs[DEVLINK_ATTR_NETNS_FD] || + info->attrs[DEVLINK_ATTR_NETNS_ID]) { + dest_net = devlink_netns_get(skb, info); + if (IS_ERR(dest_net)) + return PTR_ERR(dest_net); + } + err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack); if (dest_net) From 3f8d6577163f9903d4af5e5c0f90eb42944a8851 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 22 Nov 2021 10:11:05 -0300 Subject: [PATCH 188/253] Revert "perf bench: Fix two memory leaks detected with ASan" This: This reverts commit 92723ea0f11d92496687db8c9725248e9d1e5e1d. # perf test 91 91: perf stat --bpf-counters test :RRRRRRRRRRRRR FAILED! # perf test 91 91: perf stat --bpf-counters test :RRRRRRRRRRRRR FAILED! # perf test 91 91: perf stat --bpf-counters test :RRRRRRRRRRRR FAILED! # perf test 91 91: perf stat --bpf-counters test :RRRRRRRRRRRRRRRRRR Ok # perf test 91 91: perf stat --bpf-counters test :RRRRRRRRR FAILED! # perf test 91 91: perf stat --bpf-counters test :RRRRRRRRRRR Ok # perf test 91 91: perf stat --bpf-counters test :RRRRRRRRRRRRRRR Ok yep, it seems the perf bench is broken so the counts won't correlated if I revert this one: 92723ea0f11d perf bench: Fix two memory leaks detected with ASan it works for me again.. it seems to break -t option [root@dell-r440-01 perf]# ./perf bench sched messaging -g 1 -l 100 -t # Running 'sched/messaging' benchmark: RRRperf: CLIENT: ready write: Bad file descriptor Rperf: SENDER: write: Bad file descriptor Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Ian Rogers Cc: Namhyung Kim Cc: Sohaib Mohamed Cc: Song Liu Link: https://lore.kernel.org/lkml/YZev7KClb%2Fud43Lc@krava/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/sched-messaging.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index fa0ff4ce2b74..488f6e6ba1a5 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -223,8 +223,6 @@ static unsigned int group(pthread_t *pth, snd_ctx->out_fds[i] = fds[1]; if (!thread_mode) close(fds[0]); - - free(ctx); } /* Now we have all the fds, fork the senders */ @@ -241,8 +239,6 @@ static unsigned int group(pthread_t *pth, for (i = 0; i < num_fds; i++) close(snd_ctx->out_fds[i]); - free(snd_ctx); - /* Return number of children to reap */ return num_fds * 2; } From 71a16df164b23210d4dcaf35c70825f47d7c5599 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 8 Sep 2021 16:09:08 -0300 Subject: [PATCH 189/253] tools headers UAPI: Sync s390 syscall table file changed by new futex_waitv syscall To pick the changes in these csets: 6c122360cf2f4c5a ("s390: wire up sys_futex_waitv system call") That add support for this new syscall in tools such as 'perf trace'. For instance, this is now possible (adapted from the x86_64 test output): # perf trace -e futex_waitv ^C# # perf trace -v -e futex_waitv event qualifier tracepoint filter: (common_pid != 807333 && common_pid != 3564) && (id == 449) ^C# # perf trace -v -e futex* --max-events 10 event qualifier tracepoint filter: (common_pid != 812168 && common_pid != 3564) && (id == 238 || id == 449) ? ( ): Timer/219310 ... [continued]: futex()) = -1 ETIMEDOUT (Connection timed out) 0.012 ( 0.002 ms): Timer/219310 futex(uaddr: 0x7fd0b152d3c8, op: WAKE|PRIVATE_FLAG, val: 1) = 0 0.024 ( 0.060 ms): Timer/219310 futex(uaddr: 0x7fd0b152d420, op: WAIT_BITSET|PRIVATE_FLAG, utime: 0x7fd0b1657840, val3: MATCH_ANY) = 0 0.086 ( 0.001 ms): Timer/219310 futex(uaddr: 0x7fd0b152d3c8, op: WAKE|PRIVATE_FLAG, val: 1) = 0 0.088 ( ): Timer/219310 futex(uaddr: 0x7fd0b152d424, op: WAIT_BITSET|PRIVATE_FLAG, utime: 0x7fd0b1657840, val3: MATCH_ANY) ... 0.075 ( 0.005 ms): Web Content/219299 futex(uaddr: 0x7fd0b152d420, op: WAKE|PRIVATE_FLAG, val: 1) = 1 0.169 ( 0.004 ms): Web Content/219299 futex(uaddr: 0x7fd0b152d424, op: WAKE|PRIVATE_FLAG, val: 1) = 1 0.088 ( 0.089 ms): Timer/219310 ... [continued]: futex()) = 0 0.179 ( 0.001 ms): Timer/219310 futex(uaddr: 0x7fd0b152d3c8, op: WAKE|PRIVATE_FLAG, val: 1) = 0 0.181 ( ): Timer/219310 futex(uaddr: 0x7fd0b152d420, op: WAIT_BITSET|PRIVATE_FLAG, utime: 0x7fd0b1657840, val3: MATCH_ANY) ... # That is the filter expression attached to the raw_syscalls:sys_{enter,exit} tracepoints. $ grep futex tools/perf/arch/s390/entry/syscalls/syscall.tbl 238 common futex sys_futex sys_futex_time32 422 32 futex_time64 - sys_futex 449 common futex_waitv sys_futex_waitv sys_futex_waitv $ This addresses this perf build warnings: Warning: Kernel ABI header at 'tools/perf/arch/s390/entry/syscalls/syscall.tbl' differs from latest version at 'arch/s390/kernel/syscalls/syscall.tbl' diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl Acked-by: Heiko Carstens Cc: Adrian Hunter , Cc: Jiri Olsa Cc: Namhyung Kim Cc: Vasily Gorbik Link: https://lore.kernel.org/lkml/YZ%2F2qRW%2FTScYTP1U@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/entry/syscalls/syscall.tbl | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index df5261e5cfe1..ed9c5c2eafad 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -451,3 +451,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv sys_futex_waitv From c29d9792607e67ed8a3f6e9db0d96836d885a8c5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 25 Nov 2021 09:14:57 +0200 Subject: [PATCH 190/253] perf inject: Fix itrace space allowed for new attributes The space allowed for new attributes can be too small if existing header information is large. That can happen, for example, if there are very many CPUs, due to having an event ID per CPU per event being stored in the header information. Fix by adding the existing header.data_offset. Also increase the extra space allowed to 8KiB and align to a 4KiB boundary for neatness. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20211125071457.2066863-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index bc5259db5fd9..b9d6306cc14e 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -820,7 +820,7 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.ordered_events = true; inject->tool.ordering_requires_timestamps = true; /* Allow space in the header for new attributes */ - output_data_offset = 4096; + output_data_offset = roundup(8192 + session->header.data_offset, 4096); if (inject->strip) strip_init(inject); } From cba43fcf7aaf8369f80aac26cc2c50232c065a9e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 8 Sep 2021 16:09:08 -0300 Subject: [PATCH 191/253] tools headers UAPI: Sync powerpc syscall table file changed by new futex_waitv syscall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To pick the changes in this cset: a0eb2da92b715d0c ("futex: Wireup futex_waitv syscall") That add support for this new syscall in tools such as 'perf trace'. For instance, this is now possible (adapted from the x86_64 test output): # perf trace -e futex_waitv ^C# # perf trace -v -e futex_waitv event qualifier tracepoint filter: (common_pid != 807333 && common_pid != 3564) && (id == 449) ^C# # perf trace -v -e futex* --max-events 10 event qualifier tracepoint filter: (common_pid != 812168 && common_pid != 3564) && (id == 221 || id == 449) mmap size 528384B ? ( ): Timer/219310 ... [continued]: futex()) = -1 ETIMEDOUT (Connection timed out) 0.012 ( 0.002 ms): Timer/219310 futex(uaddr: 0x7fd0b152d3c8, op: WAKE|PRIVATE_FLAG, val: 1) = 0 0.024 ( 0.060 ms): Timer/219310 futex(uaddr: 0x7fd0b152d420, op: WAIT_BITSET|PRIVATE_FLAG, utime: 0x7fd0b1657840, val3: MATCH_ANY) = 0 0.086 ( 0.001 ms): Timer/219310 futex(uaddr: 0x7fd0b152d3c8, op: WAKE|PRIVATE_FLAG, val: 1) = 0 0.088 ( ): Timer/219310 futex(uaddr: 0x7fd0b152d424, op: WAIT_BITSET|PRIVATE_FLAG, utime: 0x7fd0b1657840, val3: MATCH_ANY) ... 0.075 ( 0.005 ms): Web Content/219299 futex(uaddr: 0x7fd0b152d420, op: WAKE|PRIVATE_FLAG, val: 1) = 1 0.169 ( 0.004 ms): Web Content/219299 futex(uaddr: 0x7fd0b152d424, op: WAKE|PRIVATE_FLAG, val: 1) = 1 0.088 ( 0.089 ms): Timer/219310 ... [continued]: futex()) = 0 0.179 ( 0.001 ms): Timer/219310 futex(uaddr: 0x7fd0b152d3c8, op: WAKE|PRIVATE_FLAG, val: 1) = 0 0.181 ( ): Timer/219310 futex(uaddr: 0x7fd0b152d420, op: WAIT_BITSET|PRIVATE_FLAG, utime: 0x7fd0b1657840, val3: MATCH_ANY) ... # That is the filter expression attached to the raw_syscalls:sys_{enter,exit} tracepoints. $ grep futex tools/perf/arch/powerpc/entry/syscalls/syscall.tbl 221 32 futex sys_futex_time32 221 64 futex sys_futex 221 spu futex sys_futex 422 32 futex_time64 sys_futex sys_futex 449 common futex_waitv sys_futex_waitv $ This addresses this perf build warnings: Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl' diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl Reviewed-by: Athira Rajeev Tested-by: Athira Rajeev Cc: Adrian Hunter , Cc: André Almeida Cc: Arnd Bergmann Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/YZ%2F1OU9mJuyS2HMa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 7bef917cc84e..15109af9d075 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -528,3 +528,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv From 4ffbe87e2d5b53bcb0213d8650bbe70bf942de6a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 23 Nov 2021 16:12:29 -0800 Subject: [PATCH 192/253] perf tools: Fix SMT detection fast read path sysfs__read_int() returns 0 on success, and so the fast read path was always failing. Fixes: bb629484d924118e ("perf tools: Simplify checking if SMT is active.") Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Konstantin Khlebnikov Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211124001231.3277836-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/smt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c index 20bacd5972ad..34f1b1b1176c 100644 --- a/tools/perf/util/smt.c +++ b/tools/perf/util/smt.c @@ -15,7 +15,7 @@ int smt_on(void) if (cached) return cached_result; - if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) > 0) + if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) goto done; ncpu = sysconf(_SC_NPROCESSORS_CONF); From 3d1d57debee2d342a47615707588b96658fabb85 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Nov 2021 10:12:41 -0300 Subject: [PATCH 193/253] tools build: Remove needless libpython-version feature check that breaks test-all fast path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 66dfdff03d196e51 ("perf tools: Add Python 3 support") we don't use the tools/build/feature/test-libpython-version.c version in any Makefile feature check: $ find tools/ -type f | xargs grep feature-libpython-version $ The only place where this was used was removed in 66dfdff03d196e51: - ifneq ($(feature-libpython-version), 1) - $(warning Python 3 is not yet supported; please set) - $(warning PYTHON and/or PYTHON_CONFIG appropriately.) - $(warning If you also have Python 2 installed, then) - $(warning try something like:) - $(warning $(and ,)) - $(warning $(and ,) make PYTHON=python2) - $(warning $(and ,)) - $(warning Otherwise, disable Python support entirely:) - $(warning $(and ,)) - $(warning $(and ,) make NO_LIBPYTHON=1) - $(warning $(and ,)) - $(error $(and ,)) - else - LDFLAGS += $(PYTHON_EMBED_LDFLAGS) - EXTLIBS += $(PYTHON_EMBED_LIBADD) - LANG_BINDINGS += $(obj-perf)python/perf.so - $(call detected,CONFIG_LIBPYTHON) - endif And nowadays we either build with PYTHON=python3 or just install the python3 devel packages and perf will build against it. But the leftover feature-libpython-version check made the fast path feature detection to break in all cases except when python2 devel files were installed: $ rpm -qa | grep python.*devel python3-devel-3.9.7-1.fc34.x86_64 $ rm -rf /tmp/build/perf ; mkdir -p /tmp/build/perf ; $ make -C tools/perf O=/tmp/build/perf install-bin make: Entering directory '/var/home/acme/git/perf/tools/perf' BUILD: Doing 'make -j32' parallel build HOSTCC /tmp/build/perf/fixdep.o $ cat /tmp/build/perf/feature/test-all.make.output In file included from test-all.c:18: test-libpython-version.c:5:10: error: #error 5 | #error | ^~~~~ $ ldd ~/bin/perf | grep python libpython3.9.so.1.0 => /lib64/libpython3.9.so.1.0 (0x00007fda6dbcf000) $ As python3 is the norm these days, fix this by just removing the unused feature-libpython-version feature check, making the test-all fast path to work with the common case. With this: $ rm -rf /tmp/build/perf ; mkdir -p /tmp/build/perf ; $ make -C tools/perf O=/tmp/build/perf install-bin |& head make: Entering directory '/var/home/acme/git/perf/tools/perf' BUILD: Doing 'make -j32' parallel build HOSTCC /tmp/build/perf/fixdep.o HOSTLD /tmp/build/perf/fixdep-in.o LINK /tmp/build/perf/fixdep Auto-detecting system features: ... dwarf: [ on ] ... dwarf_getlocations: [ on ] ... glibc: [ on ] $ ldd ~/bin/perf | grep python libpython3.9.so.1.0 => /lib64/libpython3.9.so.1.0 (0x00007f58800b0000) $ cat /tmp/build/perf/feature/test-all.make.output $ Reviewed-by: James Clark Fixes: 66dfdff03d196e51 ("perf tools: Add Python 3 support") Cc: Adrian Hunter Cc: Ian Rogers Cc: Jaroslav Škarvada Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/YaYmeeC6CS2b8OSz@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 1 - tools/build/feature/Makefile | 4 ---- tools/build/feature/test-all.c | 5 ----- tools/build/feature/test-libpython-version.c | 11 ----------- tools/perf/Makefile.config | 2 -- 5 files changed, 23 deletions(-) delete mode 100644 tools/build/feature/test-libpython-version.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 45a9a59828c3..ae61f464043a 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -48,7 +48,6 @@ FEATURE_TESTS_BASIC := \ numa_num_possible_cpus \ libperl \ libpython \ - libpython-version \ libslang \ libslang-include-subdir \ libtraceevent \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 0a3244ad9673..1480910c792e 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -32,7 +32,6 @@ FILES= \ test-numa_num_possible_cpus.bin \ test-libperl.bin \ test-libpython.bin \ - test-libpython-version.bin \ test-libslang.bin \ test-libslang-include-subdir.bin \ test-libtraceevent.bin \ @@ -227,9 +226,6 @@ $(OUTPUT)test-libperl.bin: $(OUTPUT)test-libpython.bin: $(BUILD) $(FLAGS_PYTHON_EMBED) -$(OUTPUT)test-libpython-version.bin: - $(BUILD) - $(OUTPUT)test-libbfd.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 0b243ce842be..5ffafb967b6e 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -14,10 +14,6 @@ # include "test-libpython.c" #undef main -#define main main_test_libpython_version -# include "test-libpython-version.c" -#undef main - #define main main_test_libperl # include "test-libperl.c" #undef main @@ -177,7 +173,6 @@ int main(int argc, char *argv[]) { main_test_libpython(); - main_test_libpython_version(); main_test_libperl(); main_test_hello(); main_test_libelf(); diff --git a/tools/build/feature/test-libpython-version.c b/tools/build/feature/test-libpython-version.c deleted file mode 100644 index 47714b942d4d..000000000000 --- a/tools/build/feature/test-libpython-version.c +++ /dev/null @@ -1,11 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include - -#if PY_VERSION_HEX >= 0x03000000 - #error -#endif - -int main(void) -{ - return 0; -} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index afd144725a0b..3df74cf5651a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -271,8 +271,6 @@ endif FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) -FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) -FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) FEATURE_CHECK_LDFLAGS-libaio = -lrt From 6c481031c9f71e2b0ff9c49d21116a38ca671746 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 29 Nov 2021 12:23:39 +0100 Subject: [PATCH 194/253] perf test: Fix 'Simple expression parser' test on arch without CPU die topology info Some platforms do not have CPU die support, for example s390. Commit Cc: Ian Rogers Fixes: fdf1e29b6118c18f ("perf expr: Add metric literals for topology.") fails on s390: # perf test -Fv 7 ... # FAILED tests/expr.c:173 #num_dies >= #num_packages ---- end ---- Simple expression parser: FAILED! # Investigating this issue leads to these functions: build_cpu_topology() +--> has_die_topology(void) { struct utsname uts; if (uname(&uts) < 0) return false; if (strncmp(uts.machine, "x86_64", 6)) return false; .... } which always returns false on s390. The caller build_cpu_topology() checks has_die_topology() return value. On false the the struct cpu_topology::die_cpu_list is not contructed and has zero entries. This leads to the failing comparison: #num_dies >= #num_packages. s390 of course has a positive number of packages. Fix this and check if the function build_cpu_topology() did build up a die_cpus_list. The number of entries in this list should be larger than 0. If the number of list element is zero, the die_cpus_list has not been created and the check in function test__expr(): TEST_ASSERT_VAL("#num_dies >= #num_packages", \ num_dies >= num_packages) always fails. Output after: # perf test -Fv 7 7: Simple expression parser : --- start --- division by zero syntax error ---- end ---- Simple expression parser: Ok # Fixes: fdf1e29b6118c18f ("perf expr: Add metric literals for topology.") Signed-off-by: Thomas Richter Acked-by: Ian Rogers Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/20211129112339.3003036-1-tmricht@linux.ibm.com [ Added comment in the added 'if (num_dies)' line about architectures not having die topology ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index c895de481fe1..d54c5371c6a6 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -169,7 +169,9 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u TEST_ASSERT_VAL("#num_dies", expr__parse(&num_dies, ctx, "#num_dies") == 0); TEST_ASSERT_VAL("#num_cores >= #num_dies", num_cores >= num_dies); TEST_ASSERT_VAL("#num_packages", expr__parse(&num_packages, ctx, "#num_packages") == 0); - TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages); + + if (num_dies) // Some platforms do not have CPU die support, for example s390 + TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages); /* * Source count returns the number of events aggregating in a leader From 1aa79e57730980dfbabd44e7c0a12fbb56064cb6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sun, 28 Nov 2021 00:58:10 -0800 Subject: [PATCH 195/253] perf test: Reset shadow counts before loading Otherwise load counting is an average. Without this change duration_time in test_memory_bandwidth will alter its value if an earlier test contains duration_time. This patch fixes an issue that's introduced in the proposed patch: https://lore.kernel.org/lkml/20211124015226.3317994-1-irogers@google.com/ in perf test "Parse and process metrics". Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211128085810.4027314-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-metric.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 574b7e4efd3a..07b6f4ec024f 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -109,6 +109,7 @@ static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist, struct evsel *evsel; u64 count; + perf_stat__reset_shadow_stats(); evlist__for_each_entry(evlist, evsel) { count = find_value(evsel->name, vals); perf_stat__update_shadow_stats(evsel, count, 0, st); From 4747395082abc67c700a75e4cf3b796e79c7cf3a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 18 Nov 2021 12:17:30 -0800 Subject: [PATCH 196/253] perf header: Fix memory leaks when processing feature headers These leaks were found with leak sanitizer running "perf pipe recording and injection test". In pipe mode feat_fd may hold onto an events struct that needs freeing. When string features are processed they may overwrite an already created string, so free this before the overwrite. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211118201730.2302927-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 79cce216727e..e3c1a532d059 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2321,6 +2321,7 @@ out: #define FEAT_PROCESS_STR_FUN(__feat, __feat_env) \ static int process_##__feat(struct feat_fd *ff, void *data __maybe_unused) \ {\ + free(ff->ph->env.__feat_env); \ ff->ph->env.__feat_env = do_read_string(ff); \ return ff->ph->env.__feat_env ? 0 : -ENOMEM; \ } @@ -4124,6 +4125,7 @@ int perf_event__process_feature(struct perf_session *session, struct perf_record_header_feature *fe = (struct perf_record_header_feature *)event; int type = fe->header.type; u64 feat = fe->feat_id; + int ret = 0; if (type < 0 || type >= PERF_RECORD_HEADER_MAX) { pr_warning("invalid record type %d in pipe-mode\n", type); @@ -4141,11 +4143,13 @@ int perf_event__process_feature(struct perf_session *session, ff.size = event->header.size - sizeof(*fe); ff.ph = &session->header; - if (feat_ops[feat].process(&ff, NULL)) - return -1; + if (feat_ops[feat].process(&ff, NULL)) { + ret = -1; + goto out; + } if (!feat_ops[feat].print || !tool->show_feat_hdr) - return 0; + goto out; if (!feat_ops[feat].full_only || tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) { @@ -4154,8 +4158,9 @@ int perf_event__process_feature(struct perf_session *session, fprintf(stdout, "# %s info available, use -I to display\n", feat_ops[feat].name); } - - return 0; +out: + free_event_desc(ff.events); + return ret; } size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) From f7c4e85bccea960203e5872553957511df86913e Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 3 Dec 2021 19:32:34 +0000 Subject: [PATCH 197/253] perf bpf: Fix building perf with BUILD_BPF_SKEL=1 by default in more distros Arnaldo reported that building all his containers with BUILD_BPF_SKEL=1 to then make this the default he found problems in some distros where the system linux/bpf.h file was being used and lacked this: util/bpf_skel/bperf_leader.bpf.c:13:20: error: use of undeclared identifier 'BPF_F_PRESERVE_ELEMS' __uint(map_flags, BPF_F_PRESERVE_ELEMS); So use instead the vmlinux.h file generated by bpftool from BTF info. This fixed these as well, getting the build back working on debian:11, debian:experimental and ubuntu:21.10: In file included from In file included from util/bpf_skel/bperf_leader.bpf.cutil/bpf_skel/bpf_prog_profiler.bpf.c::33: : In file included from In file included from /usr/include/linux/bpf.h/usr/include/linux/bpf.h::1111: : /usr/include/linux/types.h/usr/include/linux/types.h::55::1010:: In file included from util/bpf_skel/bperf_follower.bpf.c:3fatal errorfatal error: : : In file included from /usr/include/linux/bpf.h:'asm/types.h' file not found11'asm/types.h' file not found: /usr/include/linux/types.h:5:10: fatal error: 'asm/types.h' file not found #include #include ^~~~~~~~~~~~~ ^~~~~~~~~~~~~ #include ^~~~~~~~~~~~~ 1 error generated. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Song Liu Tested-by: Athira Rajeev Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/CF175681-8101-43D1-ABDB-449E644BE986@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_skel/bperf_follower.bpf.c | 3 +-- tools/perf/util/bpf_skel/bperf_leader.bpf.c | 3 +-- tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c index b8fa3cb2da23..4a6acfde1493 100644 --- a/tools/perf/util/bpf_skel/bperf_follower.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2021 Facebook -#include -#include +#include "vmlinux.h" #include #include #include "bperf.h" diff --git a/tools/perf/util/bpf_skel/bperf_leader.bpf.c b/tools/perf/util/bpf_skel/bperf_leader.bpf.c index 4f70d1459e86..40d962b05863 100644 --- a/tools/perf/util/bpf_skel/bperf_leader.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_leader.bpf.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2021 Facebook -#include -#include +#include "vmlinux.h" #include #include #include "bperf.h" diff --git a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c index ab12b4c4ece2..97037d3b3d9f 100644 --- a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c +++ b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2020 Facebook -#include +#include "vmlinux.h" #include #include From 5a897531e00243cebbcc4dbe4ab06cd559ccf53f Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 3 Dec 2021 15:14:41 -0800 Subject: [PATCH 198/253] perf bpf_skel: Do not use typedef to avoid error on old clang When building bpf_skel with clang-10, typedef causes confusions like: libbpf: map 'prev_readings': unexpected def kind var. Fix this by removing the typedef. Fixes: 7fac83aaf2eecc9e ("perf stat: Introduce 'bperf' to share hardware PMCs with BPF") Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Song Liu Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/BEF5C312-4331-4A60-AEC0-AD7617CB2BC4@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_skel/bperf.h | 14 -------------- tools/perf/util/bpf_skel/bperf_follower.bpf.c | 16 +++++++++++++--- tools/perf/util/bpf_skel/bperf_leader.bpf.c | 16 +++++++++++++--- 3 files changed, 26 insertions(+), 20 deletions(-) delete mode 100644 tools/perf/util/bpf_skel/bperf.h diff --git a/tools/perf/util/bpf_skel/bperf.h b/tools/perf/util/bpf_skel/bperf.h deleted file mode 100644 index 186a5551ddb9..000000000000 --- a/tools/perf/util/bpf_skel/bperf.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -// Copyright (c) 2021 Facebook - -#ifndef __BPERF_STAT_H -#define __BPERF_STAT_H - -typedef struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(struct bpf_perf_event_value)); - __uint(max_entries, 1); -} reading_map; - -#endif /* __BPERF_STAT_H */ diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c index 4a6acfde1493..f193998530d4 100644 --- a/tools/perf/util/bpf_skel/bperf_follower.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c @@ -3,11 +3,21 @@ #include "vmlinux.h" #include #include -#include "bperf.h" #include "bperf_u.h" -reading_map diff_readings SEC(".maps"); -reading_map accum_readings SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} diff_readings SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} accum_readings SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_HASH); diff --git a/tools/perf/util/bpf_skel/bperf_leader.bpf.c b/tools/perf/util/bpf_skel/bperf_leader.bpf.c index 40d962b05863..e2a2d4cd7779 100644 --- a/tools/perf/util/bpf_skel/bperf_leader.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_leader.bpf.c @@ -3,7 +3,6 @@ #include "vmlinux.h" #include #include -#include "bperf.h" struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); @@ -12,8 +11,19 @@ struct { __uint(map_flags, BPF_F_PRESERVE_ELEMS); } events SEC(".maps"); -reading_map prev_readings SEC(".maps"); -reading_map diff_readings SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} prev_readings SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} diff_readings SEC(".maps"); SEC("raw_tp/sched_switch") int BPF_PROG(on_switch) From 94cddf1e9227a171b27292509d59691819c458db Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 23 Nov 2021 20:16:54 +0900 Subject: [PATCH 199/253] can: pch_can: pch_can_rx_normal: fix use after free After calling netif_receive_skb(skb), dereferencing skb is unsafe. Especially, the can_frame cf which aliases skb memory is dereferenced just after the call netif_receive_skb(skb). Reordering the lines solves the issue. Fixes: b21d18b51b31 ("can: Topcliff: Add PCH_CAN driver.") Link: https://lore.kernel.org/all/20211123111654.621610-1-mailhol.vincent@wanadoo.fr Cc: stable@vger.kernel.org Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- drivers/net/can/pch_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c index 92a54a5fd4c5..964c8a09226a 100644 --- a/drivers/net/can/pch_can.c +++ b/drivers/net/can/pch_can.c @@ -692,11 +692,11 @@ static int pch_can_rx_normal(struct net_device *ndev, u32 obj_num, int quota) cf->data[i + 1] = data_reg >> 8; } - netif_receive_skb(skb); rcv_pkts++; stats->rx_packets++; quota--; stats->rx_bytes += cf->len; + netif_receive_skb(skb); pch_fifo_thresh(priv, obj_num); obj_num++; From 3ec6ca6b1a8e64389f0212b5a1b0f6fed1909e45 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 24 Nov 2021 17:50:41 +0300 Subject: [PATCH 200/253] can: sja1000: fix use after free in ems_pcmcia_add_card() If the last channel is not available then "dev" is freed. Fortunately, we can just use "pdev->irq" instead. Also we should check if at least one channel was set up. Fixes: fd734c6f25ae ("can/sja1000: add driver for EMS PCMCIA card") Link: https://lore.kernel.org/all/20211124145041.GB13656@kili Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Acked-by: Oliver Hartkopp Tested-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/ems_pcmcia.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/sja1000/ems_pcmcia.c b/drivers/net/can/sja1000/ems_pcmcia.c index e21b169c14c0..4642b6d4aaf7 100644 --- a/drivers/net/can/sja1000/ems_pcmcia.c +++ b/drivers/net/can/sja1000/ems_pcmcia.c @@ -234,7 +234,12 @@ static int ems_pcmcia_add_card(struct pcmcia_device *pdev, unsigned long base) free_sja1000dev(dev); } - err = request_irq(dev->irq, &ems_pcmcia_interrupt, IRQF_SHARED, + if (!card->channels) { + err = -ENODEV; + goto failure_cleanup; + } + + err = request_irq(pdev->irq, &ems_pcmcia_interrupt, IRQF_SHARED, DRV_NAME, card); if (!err) return 0; From f58ac1adc76b5beda43c64ef359056077df4d93a Mon Sep 17 00:00:00 2001 From: Brian Silverman Date: Mon, 29 Nov 2021 14:26:28 -0800 Subject: [PATCH 201/253] can: m_can: Disable and ignore ELO interrupt With the design of this driver, this condition is often triggered. However, the counter that this interrupt indicates an overflow is never read either, so overflowing is harmless. On my system, when a CAN bus starts flapping up and down, this locks up the whole system with lots of interrupts and printks. Specifically, this interrupt indicates the CEL field of ECR has overflowed. All reads of ECR mask out CEL. Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support") Link: https://lore.kernel.org/all/20211129222628.7490-1-brian.silverman@bluerivertech.com Cc: stable@vger.kernel.org Signed-off-by: Brian Silverman Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 2470c47b2e31..91be87c4f4d3 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -204,16 +204,16 @@ enum m_can_reg { /* Interrupts for version 3.0.x */ #define IR_ERR_LEC_30X (IR_STE | IR_FOE | IR_ACKE | IR_BE | IR_CRCE) -#define IR_ERR_BUS_30X (IR_ERR_LEC_30X | IR_WDI | IR_ELO | IR_BEU | \ - IR_BEC | IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | \ - IR_RF1L | IR_RF0L) +#define IR_ERR_BUS_30X (IR_ERR_LEC_30X | IR_WDI | IR_BEU | IR_BEC | \ + IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | IR_RF1L | \ + IR_RF0L) #define IR_ERR_ALL_30X (IR_ERR_STATE | IR_ERR_BUS_30X) /* Interrupts for version >= 3.1.x */ #define IR_ERR_LEC_31X (IR_PED | IR_PEA) -#define IR_ERR_BUS_31X (IR_ERR_LEC_31X | IR_WDI | IR_ELO | IR_BEU | \ - IR_BEC | IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | \ - IR_RF1L | IR_RF0L) +#define IR_ERR_BUS_31X (IR_ERR_LEC_31X | IR_WDI | IR_BEU | IR_BEC | \ + IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | IR_RF1L | \ + IR_RF0L) #define IR_ERR_ALL_31X (IR_ERR_STATE | IR_ERR_BUS_31X) /* Interrupt Line Select (ILS) */ @@ -810,8 +810,6 @@ static void m_can_handle_other_err(struct net_device *dev, u32 irqstatus) { if (irqstatus & IR_WDI) netdev_err(dev, "Message RAM Watchdog event due to missing READY\n"); - if (irqstatus & IR_ELO) - netdev_err(dev, "Error Logging Overflow\n"); if (irqstatus & IR_BEU) netdev_err(dev, "Bit Error Uncorrected\n"); if (irqstatus & IR_BEC) From 31cb32a590d62b18f69a9a6d433f4e69c74fdd56 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Sun, 7 Nov 2021 14:07:55 +0900 Subject: [PATCH 202/253] can: m_can: m_can_read_fifo: fix memory leak in error branch In m_can_read_fifo(), if the second call to m_can_fifo_read() fails, the function jump to the out_fail label and returns without calling m_can_receive_skb(). This means that the skb previously allocated by alloc_can_skb() is not freed. In other terms, this is a memory leak. This patch adds a goto label to destroy the skb if an error occurs. Issue was found with GCC -fanalyzer, please follow the link below for details. Fixes: e39381770ec9 ("can: m_can: Disable IRQs on FIFO bus errors") Link: https://lore.kernel.org/all/20211107050755.70655-1-mailhol.vincent@wanadoo.fr Cc: stable@vger.kernel.org Cc: Matt Kline Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 91be87c4f4d3..e330b4c121bf 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -517,7 +517,7 @@ static int m_can_read_fifo(struct net_device *dev, u32 rxfs) err = m_can_fifo_read(cdev, fgi, M_CAN_FIFO_DATA, cf->data, DIV_ROUND_UP(cf->len, 4)); if (err) - goto out_fail; + goto out_free_skb; } /* acknowledge rx fifo 0 */ @@ -532,6 +532,8 @@ static int m_can_read_fifo(struct net_device *dev, u32 rxfs) return 0; +out_free_skb: + kfree_skb(skb); out_fail: netdev_err(dev, "FIFO read returned %d\n", err); return err; From d737de2d7cc3efdacbf17d4e22efc75697bd76d9 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Thu, 18 Nov 2021 15:40:11 +0100 Subject: [PATCH 203/253] can: m_can: pci: fix iomap_read_fifo() and iomap_write_fifo() The same fix that was previously done in m_can_platform in commit 99d173fbe894 ("can: m_can: fix iomap_read_fifo() and iomap_write_fifo()") is required in m_can_pci as well to make iomap_read_fifo() and iomap_write_fifo() work for val_count > 1. Fixes: 812270e5445b ("can: m_can: Batch FIFO writes during CAN transmit") Fixes: 1aa6772f64b4 ("can: m_can: Batch FIFO reads during CAN receive") Link: https://lore.kernel.org/all/20211118144011.10921-1-matthias.schiffer@ew.tq-group.com Cc: stable@vger.kernel.org Cc: Matt Kline Signed-off-by: Matthias Schiffer Tested-by: Jarkko Nikula Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can_pci.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c index 89cc3d41e952..d72c294ac4d3 100644 --- a/drivers/net/can/m_can/m_can_pci.c +++ b/drivers/net/can/m_can/m_can_pci.c @@ -42,8 +42,13 @@ static u32 iomap_read_reg(struct m_can_classdev *cdev, int reg) static int iomap_read_fifo(struct m_can_classdev *cdev, int offset, void *val, size_t val_count) { struct m_can_pci_priv *priv = cdev_to_priv(cdev); + void __iomem *src = priv->base + offset; - ioread32_rep(priv->base + offset, val, val_count); + while (val_count--) { + *(unsigned int *)val = ioread32(src); + val += 4; + src += 4; + } return 0; } @@ -61,8 +66,13 @@ static int iomap_write_fifo(struct m_can_classdev *cdev, int offset, const void *val, size_t val_count) { struct m_can_pci_priv *priv = cdev_to_priv(cdev); + void __iomem *dst = priv->base + offset; - iowrite32_rep(priv->base + offset, val, val_count); + while (val_count--) { + iowrite32(*(unsigned int *)val, dst); + val += 4; + dst += 4; + } return 0; } From 8c03b8bff765ac4146342ef90931bb50e788c758 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 15 Nov 2021 10:18:49 +0100 Subject: [PATCH 204/253] can: m_can: pci: fix incorrect reference clock rate When testing the CAN controller on our Ekhart Lake hardware, we determined that all communication was running with twice the configured bitrate. Changing the reference clock rate from 100MHz to 200MHz fixed this. Intel's support has confirmed to us that 200MHz is indeed the correct clock rate. Fixes: cab7ffc0324f ("can: m_can: add PCI glue driver for Intel Elkhart Lake") Link: https://lore.kernel.org/all/c9cf3995f45c363e432b3ae8eb1275e54f009fc8.1636967198.git.matthias.schiffer@ew.tq-group.com Cc: stable@vger.kernel.org Signed-off-by: Matthias Schiffer Acked-by: Jarkko Nikula Reviewed-by: Jarkko Nikula Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c index d72c294ac4d3..8f184a852a0a 100644 --- a/drivers/net/can/m_can/m_can_pci.c +++ b/drivers/net/can/m_can/m_can_pci.c @@ -18,7 +18,7 @@ #define M_CAN_PCI_MMIO_BAR 0 -#define M_CAN_CLOCK_FREQ_EHL 100000000 +#define M_CAN_CLOCK_FREQ_EHL 200000000 #define CTL_CSR_INT_CTL_OFFSET 0x508 struct m_can_pci_priv { From ea768b2ffec6cc9c3e17c37ef75d0539b8f89ff5 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 15 Nov 2021 10:18:50 +0100 Subject: [PATCH 205/253] Revert "can: m_can: remove support for custom bit timing" The timing limits specified by the Elkhart Lake CPU datasheets do not match the defaults. Let's reintroduce the support for custom bit timings. This reverts commit 0ddd83fbebbc5537f9d180d31f659db3564be708. Link: https://lore.kernel.org/all/00c9e2596b1a548906921a574d4ef7a03c0dace0.1636967198.git.matthias.schiffer@ew.tq-group.com Signed-off-by: Matthias Schiffer Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 24 ++++++++++++++++++------ drivers/net/can/m_can/m_can.h | 3 +++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index e330b4c121bf..c2a8421e7845 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1494,20 +1494,32 @@ static int m_can_dev_setup(struct m_can_classdev *cdev) case 30: /* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.0.x */ can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO); - cdev->can.bittiming_const = &m_can_bittiming_const_30X; - cdev->can.data_bittiming_const = &m_can_data_bittiming_const_30X; + cdev->can.bittiming_const = cdev->bit_timing ? + cdev->bit_timing : &m_can_bittiming_const_30X; + + cdev->can.data_bittiming_const = cdev->data_timing ? + cdev->data_timing : + &m_can_data_bittiming_const_30X; break; case 31: /* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.1.x */ can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO); - cdev->can.bittiming_const = &m_can_bittiming_const_31X; - cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X; + cdev->can.bittiming_const = cdev->bit_timing ? + cdev->bit_timing : &m_can_bittiming_const_31X; + + cdev->can.data_bittiming_const = cdev->data_timing ? + cdev->data_timing : + &m_can_data_bittiming_const_31X; break; case 32: case 33: /* Support both MCAN version v3.2.x and v3.3.0 */ - cdev->can.bittiming_const = &m_can_bittiming_const_31X; - cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X; + cdev->can.bittiming_const = cdev->bit_timing ? + cdev->bit_timing : &m_can_bittiming_const_31X; + + cdev->can.data_bittiming_const = cdev->data_timing ? + cdev->data_timing : + &m_can_data_bittiming_const_31X; cdev->can.ctrlmode_supported |= (m_can_niso_supported(cdev) ? diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h index d18b515e6ccc..ad063b101411 100644 --- a/drivers/net/can/m_can/m_can.h +++ b/drivers/net/can/m_can/m_can.h @@ -85,6 +85,9 @@ struct m_can_classdev { struct sk_buff *tx_skb; struct phy *transceiver; + struct can_bittiming_const *bit_timing; + struct can_bittiming_const *data_timing; + struct m_can_ops *ops; int version; From ea22ba40debee29ee7257c42002409899e9311c1 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 15 Nov 2021 10:18:51 +0100 Subject: [PATCH 206/253] can: m_can: make custom bittiming fields const The assigned timing structs will be defined a const anyway, so we can avoid a few casts by declaring the struct fields as const as well. Link: https://lore.kernel.org/all/4508fa4e639164b2584c49a065d90c78a91fa568.1636967198.git.matthias.schiffer@ew.tq-group.com Signed-off-by: Matthias Schiffer Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h index ad063b101411..2c5d40997168 100644 --- a/drivers/net/can/m_can/m_can.h +++ b/drivers/net/can/m_can/m_can.h @@ -85,8 +85,8 @@ struct m_can_classdev { struct sk_buff *tx_skb; struct phy *transceiver; - struct can_bittiming_const *bit_timing; - struct can_bittiming_const *data_timing; + const struct can_bittiming_const *bit_timing; + const struct can_bittiming_const *data_timing; struct m_can_ops *ops; From ea4c1787685dbf9842046f05b6390b6901ee6ba2 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 15 Nov 2021 10:18:52 +0100 Subject: [PATCH 207/253] can: m_can: pci: use custom bit timings for Elkhart Lake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The relevant datasheet [1] specifies nonstandard limits for the bit timing parameters. While it is unclear what the exact effect of violating these limits is, it seems like a good idea to adhere to the documentation. [1] Intel Atom® x6000E Series, and Intel® Pentium® and Celeron® N and J Series Processors for IoT Applications Datasheet, Volume 2 (Book 3 of 3), July 2021, Revision 001 Fixes: cab7ffc0324f ("can: m_can: add PCI glue driver for Intel Elkhart Lake") Link: https://lore.kernel.org/all/9eba5d7c05a48ead4024ffa6e5926f191d8c6b38.1636967198.git.matthias.schiffer@ew.tq-group.com Signed-off-by: Matthias Schiffer Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can_pci.c | 48 ++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c index 8f184a852a0a..b56a54d6c5a9 100644 --- a/drivers/net/can/m_can/m_can_pci.c +++ b/drivers/net/can/m_can/m_can_pci.c @@ -18,9 +18,14 @@ #define M_CAN_PCI_MMIO_BAR 0 -#define M_CAN_CLOCK_FREQ_EHL 200000000 #define CTL_CSR_INT_CTL_OFFSET 0x508 +struct m_can_pci_config { + const struct can_bittiming_const *bit_timing; + const struct can_bittiming_const *data_timing; + unsigned int clock_freq; +}; + struct m_can_pci_priv { struct m_can_classdev cdev; @@ -84,9 +89,40 @@ static struct m_can_ops m_can_pci_ops = { .read_fifo = iomap_read_fifo, }; +static const struct can_bittiming_const m_can_bittiming_const_ehl = { + .name = KBUILD_MODNAME, + .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */ + .tseg1_max = 64, + .tseg2_min = 1, /* Time segment 2 = phase_seg2 */ + .tseg2_max = 128, + .sjw_max = 128, + .brp_min = 1, + .brp_max = 512, + .brp_inc = 1, +}; + +static const struct can_bittiming_const m_can_data_bittiming_const_ehl = { + .name = KBUILD_MODNAME, + .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */ + .tseg1_max = 16, + .tseg2_min = 1, /* Time segment 2 = phase_seg2 */ + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 32, + .brp_inc = 1, +}; + +static const struct m_can_pci_config m_can_pci_ehl = { + .bit_timing = &m_can_bittiming_const_ehl, + .data_timing = &m_can_data_bittiming_const_ehl, + .clock_freq = 200000000, +}; + static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) { struct device *dev = &pci->dev; + const struct m_can_pci_config *cfg; struct m_can_classdev *mcan_class; struct m_can_pci_priv *priv; void __iomem *base; @@ -114,6 +150,8 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) if (!mcan_class) return -ENOMEM; + cfg = (const struct m_can_pci_config *)id->driver_data; + priv = cdev_to_priv(mcan_class); priv->base = base; @@ -125,7 +163,9 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) mcan_class->dev = &pci->dev; mcan_class->net->irq = pci_irq_vector(pci, 0); mcan_class->pm_clock_support = 1; - mcan_class->can.clock.freq = id->driver_data; + mcan_class->bit_timing = cfg->bit_timing; + mcan_class->data_timing = cfg->data_timing; + mcan_class->can.clock.freq = cfg->clock_freq; mcan_class->ops = &m_can_pci_ops; pci_set_drvdata(pci, mcan_class); @@ -178,8 +218,8 @@ static SIMPLE_DEV_PM_OPS(m_can_pci_pm_ops, m_can_pci_suspend, m_can_pci_resume); static const struct pci_device_id m_can_pci_id_table[] = { - { PCI_VDEVICE(INTEL, 0x4bc1), M_CAN_CLOCK_FREQ_EHL, }, - { PCI_VDEVICE(INTEL, 0x4bc2), M_CAN_CLOCK_FREQ_EHL, }, + { PCI_VDEVICE(INTEL, 0x4bc1), (kernel_ulong_t)&m_can_pci_ehl, }, + { PCI_VDEVICE(INTEL, 0x4bc2), (kernel_ulong_t)&m_can_pci_ehl, }, { } /* Terminating Entry */ }; MODULE_DEVICE_TABLE(pci, m_can_pci_id_table); From 598ad0bd09329818ee041cb3e4b60ba0a70cb1ee Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 7 Dec 2021 09:53:24 +0000 Subject: [PATCH 208/253] netfs: Fix lockdep warning from taking sb_writers whilst holding mmap_lock Taking sb_writers whilst holding mmap_lock isn't allowed and will result in a lockdep warning like that below. The problem comes from cachefiles needing to take the sb_writers lock in order to do a write to the cache, but being asked to do this by netfslib called from readpage, readahead or write_begin[1]. Fix this by always offloading the write to the cache off to a worker thread. The main thread doesn't need to wait for it, so deadlock can be avoided. This can be tested by running the quick xfstests on something like afs or ceph with lockdep enabled. WARNING: possible circular locking dependency detected 5.15.0-rc1-build2+ #292 Not tainted ------------------------------------------------------ holetest/65517 is trying to acquire lock: ffff88810c81d730 (mapping.invalidate_lock#3){.+.+}-{3:3}, at: filemap_fault+0x276/0x7a5 but task is already holding lock: ffff8881595b53e8 (&mm->mmap_lock#2){++++}-{3:3}, at: do_user_addr_fault+0x28d/0x59c which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&mm->mmap_lock#2){++++}-{3:3}: validate_chain+0x3c4/0x4a8 __lock_acquire+0x89d/0x949 lock_acquire+0x2dc/0x34b __might_fault+0x87/0xb1 strncpy_from_user+0x25/0x18c removexattr+0x7c/0xe5 __do_sys_fremovexattr+0x73/0x96 do_syscall_64+0x67/0x7a entry_SYSCALL_64_after_hwframe+0x44/0xae -> #1 (sb_writers#10){.+.+}-{0:0}: validate_chain+0x3c4/0x4a8 __lock_acquire+0x89d/0x949 lock_acquire+0x2dc/0x34b cachefiles_write+0x2b3/0x4bb netfs_rreq_do_write_to_cache+0x3b5/0x432 netfs_readpage+0x2de/0x39d filemap_read_page+0x51/0x94 filemap_get_pages+0x26f/0x413 filemap_read+0x182/0x427 new_sync_read+0xf0/0x161 vfs_read+0x118/0x16e ksys_read+0xb8/0x12e do_syscall_64+0x67/0x7a entry_SYSCALL_64_after_hwframe+0x44/0xae -> #0 (mapping.invalidate_lock#3){.+.+}-{3:3}: check_noncircular+0xe4/0x129 check_prev_add+0x16b/0x3a4 validate_chain+0x3c4/0x4a8 __lock_acquire+0x89d/0x949 lock_acquire+0x2dc/0x34b down_read+0x40/0x4a filemap_fault+0x276/0x7a5 __do_fault+0x96/0xbf do_fault+0x262/0x35a __handle_mm_fault+0x171/0x1b5 handle_mm_fault+0x12a/0x233 do_user_addr_fault+0x3d2/0x59c exc_page_fault+0x85/0xa5 asm_exc_page_fault+0x1e/0x30 other info that might help us debug this: Chain exists of: mapping.invalidate_lock#3 --> sb_writers#10 --> &mm->mmap_lock#2 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_lock#2); lock(sb_writers#10); lock(&mm->mmap_lock#2); lock(mapping.invalidate_lock#3); *** DEADLOCK *** 1 lock held by holetest/65517: #0: ffff8881595b53e8 (&mm->mmap_lock#2){++++}-{3:3}, at: do_user_addr_fault+0x28d/0x59c stack backtrace: CPU: 0 PID: 65517 Comm: holetest Not tainted 5.15.0-rc1-build2+ #292 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Call Trace: dump_stack_lvl+0x45/0x59 check_noncircular+0xe4/0x129 ? print_circular_bug+0x207/0x207 ? validate_chain+0x461/0x4a8 ? add_chain_block+0x88/0xd9 ? hlist_add_head_rcu+0x49/0x53 check_prev_add+0x16b/0x3a4 validate_chain+0x3c4/0x4a8 ? check_prev_add+0x3a4/0x3a4 ? mark_lock+0xa5/0x1c6 __lock_acquire+0x89d/0x949 lock_acquire+0x2dc/0x34b ? filemap_fault+0x276/0x7a5 ? rcu_read_unlock+0x59/0x59 ? add_to_page_cache_lru+0x13c/0x13c ? lock_is_held_type+0x7b/0xd3 down_read+0x40/0x4a ? filemap_fault+0x276/0x7a5 filemap_fault+0x276/0x7a5 ? pagecache_get_page+0x2dd/0x2dd ? __lock_acquire+0x8bc/0x949 ? pte_offset_kernel.isra.0+0x6d/0xc3 __do_fault+0x96/0xbf ? do_fault+0x124/0x35a do_fault+0x262/0x35a ? handle_pte_fault+0x1c1/0x20d __handle_mm_fault+0x171/0x1b5 ? handle_pte_fault+0x20d/0x20d ? __lock_release+0x151/0x254 ? mark_held_locks+0x1f/0x78 ? rcu_read_unlock+0x3a/0x59 handle_mm_fault+0x12a/0x233 do_user_addr_fault+0x3d2/0x59c ? pgtable_bad+0x70/0x70 ? rcu_read_lock_bh_held+0xab/0xab exc_page_fault+0x85/0xa5 ? asm_exc_page_fault+0x8/0x30 asm_exc_page_fault+0x1e/0x30 RIP: 0033:0x40192f Code: ff 48 89 c3 48 8b 05 50 28 00 00 48 85 ed 7e 23 31 d2 4b 8d 0c 2f eb 0a 0f 1f 00 48 8b 05 39 28 00 00 48 0f af c2 48 83 c2 01 <48> 89 1c 01 48 39 d5 7f e8 8b 0d f2 27 00 00 31 c0 85 c9 74 0e 8b RSP: 002b:00007f9931867eb0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 00007f9931868700 RCX: 00007f993206ac00 RDX: 0000000000000001 RSI: 0000000000000000 RDI: 00007ffc13e06ee0 RBP: 0000000000000100 R08: 0000000000000000 R09: 00007f9931868700 R10: 00007f99318689d0 R11: 0000000000000202 R12: 00007ffc13e06ee0 R13: 0000000000000c00 R14: 00007ffc13e06e00 R15: 00007f993206a000 Fixes: 726218fdc22c ("netfs: Define an interface to talk to a cache") Signed-off-by: David Howells Tested-by: Jeff Layton cc: Jan Kara cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20210922110420.GA21576@quack2.suse.cz/ [1] Link: https://lore.kernel.org/r/163887597541.1596626.2668163316598972956.stgit@warthog.procyon.org.uk/ # v1 --- fs/netfs/read_helper.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c index 7046f9bdd8dc..7c6e199618af 100644 --- a/fs/netfs/read_helper.c +++ b/fs/netfs/read_helper.c @@ -354,16 +354,11 @@ static void netfs_rreq_write_to_cache_work(struct work_struct *work) netfs_rreq_do_write_to_cache(rreq); } -static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq, - bool was_async) +static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq) { - if (was_async) { - rreq->work.func = netfs_rreq_write_to_cache_work; - if (!queue_work(system_unbound_wq, &rreq->work)) - BUG(); - } else { - netfs_rreq_do_write_to_cache(rreq); - } + rreq->work.func = netfs_rreq_write_to_cache_work; + if (!queue_work(system_unbound_wq, &rreq->work)) + BUG(); } /* @@ -558,7 +553,7 @@ again: wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags)) - return netfs_rreq_write_to_cache(rreq, was_async); + return netfs_rreq_write_to_cache(rreq); netfs_rreq_completed(rreq, was_async); } From 3cfef1b612e15a0c2f5b1c9d3f3f31ad72d56fcd Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Tue, 7 Dec 2021 11:14:49 +0800 Subject: [PATCH 209/253] netfs: fix parameter of cleanup() The order of these two parameters is just reversed. gcc didn't warn on that, probably because 'void *' can be converted from or to other pointer types without warning. Cc: stable@vger.kernel.org Fixes: 3d3c95046742 ("netfs: Provide readahead and readpage netfs helpers") Fixes: e1b1240c1ff5 ("netfs: Add write_begin helper") Signed-off-by: Jeffle Xu Signed-off-by: David Howells Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/20211207031449.100510-1-jefflexu@linux.alibaba.com/ # v1 --- fs/netfs/read_helper.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c index 7c6e199618af..75c76cbb27cc 100644 --- a/fs/netfs/read_helper.c +++ b/fs/netfs/read_helper.c @@ -955,7 +955,7 @@ int netfs_readpage(struct file *file, rreq = netfs_alloc_read_request(ops, netfs_priv, file); if (!rreq) { if (netfs_priv) - ops->cleanup(netfs_priv, folio_file_mapping(folio)); + ops->cleanup(folio_file_mapping(folio), netfs_priv); folio_unlock(folio); return -ENOMEM; } @@ -1186,7 +1186,7 @@ have_folio: goto error; have_folio_no_wait: if (netfs_priv) - ops->cleanup(netfs_priv, mapping); + ops->cleanup(mapping, netfs_priv); *_folio = folio; _leave(" = 0"); return 0; @@ -1197,7 +1197,7 @@ error: folio_unlock(folio); folio_put(folio); if (netfs_priv) - ops->cleanup(netfs_priv, mapping); + ops->cleanup(mapping, netfs_priv); _leave(" = %d", ret); return ret; } From d17b9737c2bc09b4ac6caf469826e5a7ce3ffab7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 7 Dec 2021 11:24:16 +0300 Subject: [PATCH 210/253] net/qla3xxx: fix an error code in ql_adapter_up() The ql_wait_for_drvr_lock() fails and returns false, then this function should return an error code instead of returning success. The other problem is that the success path prints an error message netdev_err(ndev, "Releasing driver lock\n"); Delete that and re-order the code a little to make it more clear. Fixes: 5a4faa873782 ("[PATCH] qla3xxx NIC driver") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20211207082416.GA16110@kili Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qla3xxx.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 1e6d72adfe43..71523d747e93 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3480,20 +3480,19 @@ static int ql_adapter_up(struct ql3_adapter *qdev) spin_lock_irqsave(&qdev->hw_lock, hw_flags); - err = ql_wait_for_drvr_lock(qdev); - if (err) { - err = ql_adapter_initialize(qdev); - if (err) { - netdev_err(ndev, "Unable to initialize adapter\n"); - goto err_init; - } - netdev_err(ndev, "Releasing driver lock\n"); - ql_sem_unlock(qdev, QL_DRVR_SEM_MASK); - } else { + if (!ql_wait_for_drvr_lock(qdev)) { netdev_err(ndev, "Could not acquire driver lock\n"); + err = -ENODEV; goto err_lock; } + err = ql_adapter_initialize(qdev); + if (err) { + netdev_err(ndev, "Unable to initialize adapter\n"); + goto err_init; + } + ql_sem_unlock(qdev, QL_DRVR_SEM_MASK); + spin_unlock_irqrestore(&qdev->hw_lock, hw_flags); set_bit(QL_ADAPTER_UP, &qdev->flags); From f23ab04dd6f703e282bb2d51fe3ae14f4b88a628 Mon Sep 17 00:00:00 2001 From: Yahui Cao Date: Wed, 5 May 2021 14:18:00 -0700 Subject: [PATCH 211/253] ice: fix FDIR init missing when reset VF When VF is being reset, ice_reset_vf() will be called and FDIR resource should be released and initialized again. Fixes: 1f7ea1cd6a37 ("ice: Enable FDIR Configure for AVF") Signed-off-by: Yahui Cao Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 217ff5e9a6f1..c2431bc9d9ce 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -1617,6 +1617,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) ice_vc_set_default_allowlist(vf); ice_vf_fdir_exit(vf); + ice_vf_fdir_init(vf); /* clean VF control VSI when resetting VFs since it should be * setup only when VF creates its first FDIR rule. */ @@ -1747,6 +1748,7 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) } ice_vf_fdir_exit(vf); + ice_vf_fdir_init(vf); /* clean VF control VSI when resetting VF since it should be setup * only when VF creates its first FDIR rule. */ From 2657e16d8c52fb6ffc7250b0b6536f93886e32d6 Mon Sep 17 00:00:00 2001 From: Paul Greenwalt Date: Mon, 12 Jul 2021 07:54:25 -0400 Subject: [PATCH 212/253] ice: rearm other interrupt cause register after enabling VFs The other interrupt cause register (OICR), global interrupt 0, is disabled when enabling VFs to prevent handling VFLR. If the OICR is not rearmed then the VF cannot communicate with the PF. Rearm the OICR after enabling VFs. Fixes: 916c7fdf5e93 ("ice: Separate VF VSI initialization/creation from reset flow") Signed-off-by: Paul Greenwalt Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index c2431bc9d9ce..6427e7ec93de 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -2023,6 +2023,10 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) if (ret) goto err_unroll_sriov; + /* rearm global interrupts */ + if (test_and_clear_bit(ICE_OICR_INTR_DIS, pf->state)) + ice_irq_dynamic_ena(hw, NULL, NULL); + return 0; err_unroll_sriov: From 6d39ea19b0fb6cc72427c862b32d39f5af468be3 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Tue, 12 Oct 2021 13:31:21 -0700 Subject: [PATCH 213/253] ice: Fix problems with DSCP QoS implementation The patch that implemented DSCP QoS implementation removed a bandwidth check that was used to check for a specific condition caused by some corner cases. This check should not of been removed. The same patch also added a check for when the DCBx state could be changed in relation to DSCP, but the check was erroneously added nested in a check for CEE mode, which made the check useless. Fix these problems by re-adding the bandwidth check and relocating the DSCP mode check earlier in the function that changes DCBx state in the driver. Fixes: 2a87bd73e50d ("ice: Add DSCP support") Reported-by: kernel test robot Signed-off-by: Dave Ertman Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_dcb_nl.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c index 7fdeb411b6df..3eb01731e496 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c @@ -97,6 +97,9 @@ static int ice_dcbnl_setets(struct net_device *netdev, struct ieee_ets *ets) new_cfg->etscfg.maxtcs = pf->hw.func_caps.common_cap.maxtc; + if (!bwcfg) + new_cfg->etscfg.tcbwtable[0] = 100; + if (!bwrec) new_cfg->etsrec.tcbwtable[0] = 100; @@ -167,15 +170,18 @@ static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode) if (mode == pf->dcbx_cap) return ICE_DCB_NO_HW_CHG; - pf->dcbx_cap = mode; qos_cfg = &pf->hw.port_info->qos_cfg; - if (mode & DCB_CAP_DCBX_VER_CEE) { - if (qos_cfg->local_dcbx_cfg.pfc_mode == ICE_QOS_MODE_DSCP) - return ICE_DCB_NO_HW_CHG; + + /* DSCP configuration is not DCBx negotiated */ + if (qos_cfg->local_dcbx_cfg.pfc_mode == ICE_QOS_MODE_DSCP) + return ICE_DCB_NO_HW_CHG; + + pf->dcbx_cap = mode; + + if (mode & DCB_CAP_DCBX_VER_CEE) qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_CEE; - } else { + else qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_IEEE; - } dev_info(ice_pf_to_dev(pf), "DCBx mode = 0x%x\n", mode); return ICE_DCB_HW_CHG_RST; From 28dc1b86f8ea9fd6f4c9e0b363db73ecabf84e22 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Fri, 22 Oct 2021 17:28:17 -0700 Subject: [PATCH 214/253] ice: ignore dropped packets during init If the hardware is constantly receiving unicast or broadcast packets during driver load, the device previously counted many GLV_RDPC (VSI dropped packets) events during init. This causes confusing dropped packet statistics during driver load. The dropped packets counter incrementing does stop once the driver finishes loading. Avoid this problem by baselining our statistics at the end of driver open instead of the end of probe. Fixes: cdedef59deb0 ("ice: Configure VSIs for Tx/Rx") Signed-off-by: Jesse Brandeburg Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 4d1fc48c9744..c6d6ce52e2ca 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5881,6 +5881,9 @@ static int ice_up_complete(struct ice_vsi *vsi) netif_carrier_on(vsi->netdev); } + /* clear this now, and the first stats read will be used as baseline */ + vsi->stat_offsets_loaded = false; + ice_service_task_schedule(pf); return 0; From 0e32ff024035b693a3304b3ffe30fba58e2ab48c Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 16 Nov 2021 11:24:26 +0100 Subject: [PATCH 215/253] ice: fix choosing UDP header type In tunnels packet there can be two UDP headers: - outer which for hw should be mark as ICE_UDP_OF - inner which for hw should be mark as ICE_UDP_ILOS or as ICE_TCP_IL if inner header is of TCP type In none tunnels packet header can be: - UDP, which for hw should be mark as ICE_UDP_ILOS - TCP, which for hw should be mark as ICE_TCP_IL Change incorrect ICE_UDP_OF for none tunnel packets to ICE_UDP_ILOS. ICE_UDP_OF is incorrect for none tunnel packets and setting it leads to error from hw while adding this kind of recipe. In summary, for tunnel outer port type should always be set to ICE_UDP_OF, for none tunnel outer and tunnel inner it should always be set to ICE_UDP_ILOS. Fixes: 9e300987d4a8 ("ice: VXLAN and Geneve TC support") Signed-off-by: Michal Swiatkowski Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_tc_lib.c | 27 ++++++++------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index e5d23feb6701..384439a267ad 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -74,21 +74,13 @@ static enum ice_protocol_type ice_proto_type_from_ipv6(bool inner) return inner ? ICE_IPV6_IL : ICE_IPV6_OFOS; } -static enum ice_protocol_type -ice_proto_type_from_l4_port(bool inner, u16 ip_proto) +static enum ice_protocol_type ice_proto_type_from_l4_port(u16 ip_proto) { - if (inner) { - switch (ip_proto) { - case IPPROTO_UDP: - return ICE_UDP_ILOS; - } - } else { - switch (ip_proto) { - case IPPROTO_TCP: - return ICE_TCP_IL; - case IPPROTO_UDP: - return ICE_UDP_OF; - } + switch (ip_proto) { + case IPPROTO_TCP: + return ICE_TCP_IL; + case IPPROTO_UDP: + return ICE_UDP_ILOS; } return 0; @@ -191,8 +183,9 @@ ice_tc_fill_tunnel_outer(u32 flags, struct ice_tc_flower_fltr *fltr, i++; } - if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT) { - list[i].type = ice_proto_type_from_l4_port(false, hdr->l3_key.ip_proto); + if ((flags & ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT) && + hdr->l3_key.ip_proto == IPPROTO_UDP) { + list[i].type = ICE_UDP_OF; list[i].h_u.l4_hdr.dst_port = hdr->l4_key.dst_port; list[i].m_u.l4_hdr.dst_port = hdr->l4_mask.dst_port; i++; @@ -317,7 +310,7 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, ICE_TC_FLWR_FIELD_SRC_L4_PORT)) { struct ice_tc_l4_hdr *l4_key, *l4_mask; - list[i].type = ice_proto_type_from_l4_port(inner, headers->l3_key.ip_proto); + list[i].type = ice_proto_type_from_l4_port(headers->l3_key.ip_proto); l4_key = &headers->l4_key; l4_mask = &headers->l4_mask; From de6acd1cdd4d38823b7f4adae82e8a7d62993354 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Mon, 22 Nov 2021 16:39:25 +0100 Subject: [PATCH 216/253] ice: fix adding different tunnels Adding filters with the same values inside for VXLAN and Geneve causes HW error, because it looks exactly the same. To choose between different type of tunnels new recipe is needed. Add storing tunnel types in creating recipes function and start checking it in finding function. Change getting open tunnels function to return port on correct tunnel type. This is needed to copy correct port to dummy packet. Block user from adding enc_dst_port via tc flower, because VXLAN and Geneve filters can be created only with destination port which was previously opened. Fixes: 8b032a55c1bd5 ("ice: low level support for tunnels") Signed-off-by: Michal Swiatkowski Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_ethtool_fdir.c | 4 ++-- drivers/net/ethernet/intel/ice/ice_fdir.c | 2 +- .../net/ethernet/intel/ice/ice_flex_pipe.c | 7 +++++-- .../net/ethernet/intel/ice/ice_flex_pipe.h | 3 ++- drivers/net/ethernet/intel/ice/ice_switch.c | 21 ++++++++++++------- drivers/net/ethernet/intel/ice/ice_tc_lib.c | 3 ++- 6 files changed, 26 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 38960bcc384c..b6e7f47c8c78 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1268,7 +1268,7 @@ ice_fdir_write_all_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool is_tun = tun == ICE_FD_HW_SEG_TUN; int err; - if (is_tun && !ice_get_open_tunnel_port(&pf->hw, &port_num)) + if (is_tun && !ice_get_open_tunnel_port(&pf->hw, &port_num, TNL_ALL)) continue; err = ice_fdir_write_fltr(pf, input, add, is_tun); if (err) @@ -1652,7 +1652,7 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd) } /* return error if not an update and no available filters */ - fltrs_needed = ice_get_open_tunnel_port(hw, &tunnel_port) ? 2 : 1; + fltrs_needed = ice_get_open_tunnel_port(hw, &tunnel_port, TNL_ALL) ? 2 : 1; if (!ice_fdir_find_fltr_by_idx(hw, fsp->location) && ice_fdir_num_avail_fltr(hw, pf->vsi[vsi->idx]) < fltrs_needed) { dev_err(dev, "Failed to add filter. The maximum number of flow director filters has been reached.\n"); diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c index cbd8424631e3..4dca009bdd50 100644 --- a/drivers/net/ethernet/intel/ice/ice_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_fdir.c @@ -924,7 +924,7 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input, memcpy(pkt, ice_fdir_pkt[idx].pkt, ice_fdir_pkt[idx].pkt_len); loc = pkt; } else { - if (!ice_get_open_tunnel_port(hw, &tnl_port)) + if (!ice_get_open_tunnel_port(hw, &tnl_port, TNL_ALL)) return ICE_ERR_DOES_NOT_EXIST; if (!ice_fdir_pkt[idx].tun_pkt) return ICE_ERR_PARAM; diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 23cfcceb1536..6ad1c2559724 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -1899,9 +1899,11 @@ static struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld) * ice_get_open_tunnel_port - retrieve an open tunnel port * @hw: pointer to the HW structure * @port: returns open port + * @type: type of tunnel, can be TNL_LAST if it doesn't matter */ bool -ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port) +ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port, + enum ice_tunnel_type type) { bool res = false; u16 i; @@ -1909,7 +1911,8 @@ ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port) mutex_lock(&hw->tnl_lock); for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) - if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].port) { + if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].port && + (type == TNL_LAST || type == hw->tnl.tbl[i].type)) { *port = hw->tnl.tbl[i].port; res = true; break; diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h index 344c2637facd..a2863f38fd1f 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h @@ -33,7 +33,8 @@ enum ice_status ice_get_sw_fv_list(struct ice_hw *hw, u8 *prot_ids, u16 ids_cnt, unsigned long *bm, struct list_head *fv_list); bool -ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port); +ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port, + enum ice_tunnel_type type); int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti); int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table, diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 793f4a9fc2cd..183d93033890 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -3796,10 +3796,13 @@ static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = { * ice_find_recp - find a recipe * @hw: pointer to the hardware structure * @lkup_exts: extension sequence to match + * @tun_type: type of recipe tunnel * * Returns index of matching recipe, or ICE_MAX_NUM_RECIPES if not found. */ -static u16 ice_find_recp(struct ice_hw *hw, struct ice_prot_lkup_ext *lkup_exts) +static u16 +ice_find_recp(struct ice_hw *hw, struct ice_prot_lkup_ext *lkup_exts, + enum ice_sw_tunnel_type tun_type) { bool refresh_required = true; struct ice_sw_recipe *recp; @@ -3860,8 +3863,9 @@ static u16 ice_find_recp(struct ice_hw *hw, struct ice_prot_lkup_ext *lkup_exts) } /* If for "i"th recipe the found was never set to false * then it means we found our match + * Also tun type of recipe needs to be checked */ - if (found) + if (found && recp[i].tun_type == tun_type) return i; /* Return the recipe ID */ } } @@ -4651,11 +4655,12 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, } /* Look for a recipe which matches our requested fv / mask list */ - *rid = ice_find_recp(hw, lkup_exts); + *rid = ice_find_recp(hw, lkup_exts, rinfo->tun_type); if (*rid < ICE_MAX_NUM_RECIPES) /* Success if found a recipe that match the existing criteria */ goto err_unroll; + rm->tun_type = rinfo->tun_type; /* Recipe we need does not exist, add a recipe */ status = ice_add_sw_recipe(hw, rm, profiles); if (status) @@ -4958,11 +4963,13 @@ ice_fill_adv_packet_tun(struct ice_hw *hw, enum ice_sw_tunnel_type tun_type, switch (tun_type) { case ICE_SW_TUN_VXLAN: - case ICE_SW_TUN_GENEVE: - if (!ice_get_open_tunnel_port(hw, &open_port)) + if (!ice_get_open_tunnel_port(hw, &open_port, TNL_VXLAN)) + return ICE_ERR_CFG; + break; + case ICE_SW_TUN_GENEVE: + if (!ice_get_open_tunnel_port(hw, &open_port, TNL_GENEVE)) return ICE_ERR_CFG; break; - default: /* Nothing needs to be done for this tunnel type */ return 0; @@ -5555,7 +5562,7 @@ ice_rem_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, if (status) return status; - rid = ice_find_recp(hw, &lkup_exts); + rid = ice_find_recp(hw, &lkup_exts, rinfo->tun_type); /* If did not find a recipe that match the existing criteria */ if (rid == ICE_MAX_NUM_RECIPES) return ICE_ERR_PARAM; diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 384439a267ad..25cca5c4ae57 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -795,7 +795,8 @@ ice_parse_tunnel_attr(struct net_device *dev, struct flow_rule *rule, headers->l3_mask.ttl = match.mask->ttl; } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) && + fltr->tunnel_type != TNL_VXLAN && fltr->tunnel_type != TNL_GENEVE) { struct flow_match_ports match; flow_rule_match_enc_ports(rule, &match); From d43b75fbc23f0ac1ef9c14a5a166d3ccb761a451 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 26 Nov 2021 15:36:12 +0100 Subject: [PATCH 217/253] vrf: don't run conntrack on vrf with !dflt qdisc After the below patch, the conntrack attached to skb is set to "notrack" in the context of vrf device, for locally generated packets. But this is true only when the default qdisc is set to the vrf device. When changing the qdisc, notrack is not set anymore. In fact, there is a shortcut in the vrf driver, when the default qdisc is set, see commit dcdd43c41e60 ("net: vrf: performance improvements for IPv4") for more details. This patch ensures that the behavior is always the same, whatever the qdisc is. To demonstrate the difference, a new test is added in conntrack_vrf.sh. Fixes: 8c9c296adfae ("vrf: run conntrack only in context of lower/physdev for locally generated packets") Signed-off-by: Nicolas Dichtel Acked-by: Florian Westphal Reviewed-by: David Ahern Signed-off-by: Pablo Neira Ayuso --- drivers/net/vrf.c | 8 ++--- .../selftests/netfilter/conntrack_vrf.sh | 30 ++++++++++++++++--- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index ccf677015d5b..38c2f0dbe795 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -768,8 +768,6 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, skb->dev = vrf_dev; - vrf_nf_set_untracked(skb); - err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, vrf_dev, vrf_ip6_out_direct_finish); @@ -790,6 +788,8 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) return skb; + vrf_nf_set_untracked(skb); + if (qdisc_tx_is_default(vrf_dev) || IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) return vrf_ip6_out_direct(vrf_dev, sk, skb); @@ -998,8 +998,6 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, skb->dev = vrf_dev; - vrf_nf_set_untracked(skb); - err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, vrf_dev, vrf_ip_out_direct_finish); @@ -1021,6 +1019,8 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, ipv4_is_lbcast(ip_hdr(skb)->daddr)) return skb; + vrf_nf_set_untracked(skb); + if (qdisc_tx_is_default(vrf_dev) || IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) return vrf_ip_out_direct(vrf_dev, sk, skb); diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh index 91f3ef0f1192..8b5ea9234588 100755 --- a/tools/testing/selftests/netfilter/conntrack_vrf.sh +++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh @@ -150,11 +150,27 @@ EOF # oifname is the vrf device. test_masquerade_vrf() { + local qdisc=$1 + + if [ "$qdisc" != "default" ]; then + tc -net $ns0 qdisc add dev tvrf root $qdisc + fi + ip netns exec $ns0 conntrack -F 2>/dev/null ip netns exec $ns0 nft -f - < Date: Sat, 27 Nov 2021 11:33:37 +0100 Subject: [PATCH 218/253] nft_set_pipapo: Fix bucket load in AVX2 lookup routine for six 8-bit groups The sixth byte of packet data has to be looked up in the sixth group, not in the seventh one, even if we load the bucket data into ymm6 (and not ymm5, for convenience of tracking stalls). Without this fix, matching on a MAC address as first field of a set, if 8-bit groups are selected (due to a small set size) would fail, that is, the given MAC address would never match. Reported-by: Nikita Yushchenko Cc: # 5.6.x Fixes: 7400b063969b ("nft_set_pipapo: Introduce AVX2-based lookup implementation") Signed-off-by: Stefano Brivio Tested-By: Nikita Yushchenko Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo_avx2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index e517663e0cd1..6f4116e72958 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -886,7 +886,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill, NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 4, pkt[4], bsize); NFT_PIPAPO_AVX2_AND(5, 0, 1); - NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 6, pkt[5], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 5, pkt[5], bsize); NFT_PIPAPO_AVX2_AND(7, 2, 3); /* Stall */ From 0de53b0ffb5b22b52c1e0bd4d9e18cbbce5801d0 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Sat, 27 Nov 2021 11:33:38 +0100 Subject: [PATCH 219/253] selftests: netfilter: Add correctness test for mac,net set type The existing net,mac test didn't cover the issue recently reported by Nikita Yushchenko, where MAC addresses wouldn't match if given as first field of a concatenated set with AVX2 and 8-bit groups, because there's a different code path covering the lookup of six 8-bit groups (MAC addresses) if that's the first field. Add a similar mac,net test, with MAC address and IPv4 address swapped in the set specification. Signed-off-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- .../selftests/netfilter/nft_concat_range.sh | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh index 5a4938d6dcf2..ed61f6cab60f 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -23,8 +23,8 @@ TESTS="reported_issues correctness concurrency timeout" # Set types, defined by TYPE_ variables below TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto - net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port - net_port_mac_proto_net" + net_port_net net_mac mac_net net_mac_icmp net6_mac_icmp + net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below BUGS="flush_remove_add" @@ -277,6 +277,23 @@ perf_entries 1000 perf_proto ipv4 " +TYPE_mac_net=" +display mac,net +type_spec ether_addr . ipv4_addr +chain_spec ether saddr . ip saddr +dst +src mac addr4 +start 1 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + TYPE_net_mac_icmp=" display net,mac - ICMP type_spec ipv4_addr . ether_addr @@ -984,7 +1001,8 @@ format() { fi done for f in ${src}; do - __expr="${__expr} . " + [ "${__expr}" != "{ " ] && __expr="${__expr} . " + __start="$(eval format_"${f}" "${srcstart}")" __end="$(eval format_"${f}" "${srcend}")" From 962e5a40358787105f126ab1dc01604da3d169e9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Nov 2021 11:34:04 +0100 Subject: [PATCH 220/253] netfilter: nft_exthdr: break evaluation if setting TCP option fails Break rule evaluation on malformed TCP options. Fixes: 99d1712bc41c ("netfilter: exthdr: tcp option set support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_exthdr.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index af4ee874a067..dbe1f2e7dd9e 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -236,7 +236,7 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); if (!tcph) - return; + goto err; opt = (u8 *)tcph; for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) { @@ -251,16 +251,16 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, continue; if (i + optl > tcphdr_len || priv->len + priv->offset > optl) - return; + goto err; if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + i + priv->len)) - return; + goto err; tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); if (!tcph) - return; + goto err; offset = i + priv->offset; @@ -303,6 +303,9 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, return; } + return; +err: + regs->verdict.code = NFT_BREAK; } static void nft_exthdr_sctp_eval(const struct nft_expr *expr, From d46cea0e6933da93c5373a46e3dc7e5d0e56bedb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 3 Dec 2021 15:33:23 +0100 Subject: [PATCH 221/253] selftests: netfilter: switch zone stress to socat centos9 has nmap-ncat which doesn't like the '-q' option, use socat. While at it, mark test skipped if needed tools are missing. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- .../selftests/netfilter/nft_zones_many.sh | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh index ac646376eb01..04633119b29a 100755 --- a/tools/testing/selftests/netfilter/nft_zones_many.sh +++ b/tools/testing/selftests/netfilter/nft_zones_many.sh @@ -18,11 +18,17 @@ cleanup() ip netns del $ns } -ip netns add $ns -if [ $? -ne 0 ];then - echo "SKIP: Could not create net namespace $gw" - exit $ksft_skip -fi +checktool (){ + if ! $1 > /dev/null 2>&1; then + echo "SKIP: Could not $2" + exit $ksft_skip + fi +} + +checktool "nft --version" "run test without nft tool" +checktool "ip -Version" "run test without ip tool" +checktool "socat -V" "run test without socat tool" +checktool "ip netns add $ns" "create net namespace" trap cleanup EXIT @@ -71,7 +77,8 @@ EOF local start=$(date +%s%3N) i=$((i + 10000)) j=$((j + 1)) - dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" nc -w 1 -q 1 -u -p 12345 127.0.0.1 12345 > /dev/null + # nft rule in output places each packet in a different zone. + dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345 if [ $? -ne 0 ] ;then ret=1 break From 802a7dc5cf1bef06f7b290ce76d478138408d6b1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Dec 2021 10:03:23 -0800 Subject: [PATCH 222/253] netfilter: conntrack: annotate data-races around ct->timeout (struct nf_conn)->timeout can be read/written locklessly, add READ_ONCE()/WRITE_ONCE() to prevent load/store tearing. BUG: KCSAN: data-race in __nf_conntrack_alloc / __nf_conntrack_find_get write to 0xffff888132e78c08 of 4 bytes by task 6029 on cpu 0: __nf_conntrack_alloc+0x158/0x280 net/netfilter/nf_conntrack_core.c:1563 init_conntrack+0x1da/0xb30 net/netfilter/nf_conntrack_core.c:1635 resolve_normal_ct+0x502/0x610 net/netfilter/nf_conntrack_core.c:1746 nf_conntrack_in+0x1c5/0x88f net/netfilter/nf_conntrack_core.c:1901 ipv6_conntrack_local+0x19/0x20 net/netfilter/nf_conntrack_proto.c:414 nf_hook_entry_hookfn include/linux/netfilter.h:142 [inline] nf_hook_slow+0x72/0x170 net/netfilter/core.c:619 nf_hook include/linux/netfilter.h:262 [inline] NF_HOOK include/linux/netfilter.h:305 [inline] ip6_xmit+0xa3a/0xa60 net/ipv6/ip6_output.c:324 inet6_csk_xmit+0x1a2/0x1e0 net/ipv6/inet6_connection_sock.c:135 __tcp_transmit_skb+0x132a/0x1840 net/ipv4/tcp_output.c:1402 tcp_transmit_skb net/ipv4/tcp_output.c:1420 [inline] tcp_write_xmit+0x1450/0x4460 net/ipv4/tcp_output.c:2680 __tcp_push_pending_frames+0x68/0x1c0 net/ipv4/tcp_output.c:2864 tcp_push_pending_frames include/net/tcp.h:1897 [inline] tcp_data_snd_check+0x62/0x2e0 net/ipv4/tcp_input.c:5452 tcp_rcv_established+0x880/0x10e0 net/ipv4/tcp_input.c:5947 tcp_v6_do_rcv+0x36e/0xa50 net/ipv6/tcp_ipv6.c:1521 sk_backlog_rcv include/net/sock.h:1030 [inline] __release_sock+0xf2/0x270 net/core/sock.c:2768 release_sock+0x40/0x110 net/core/sock.c:3300 sk_stream_wait_memory+0x435/0x700 net/core/stream.c:145 tcp_sendmsg_locked+0xb85/0x25a0 net/ipv4/tcp.c:1402 tcp_sendmsg+0x2c/0x40 net/ipv4/tcp.c:1440 inet6_sendmsg+0x5f/0x80 net/ipv6/af_inet6.c:644 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg net/socket.c:724 [inline] __sys_sendto+0x21e/0x2c0 net/socket.c:2036 __do_sys_sendto net/socket.c:2048 [inline] __se_sys_sendto net/socket.c:2044 [inline] __x64_sys_sendto+0x74/0x90 net/socket.c:2044 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff888132e78c08 of 4 bytes by task 17446 on cpu 1: nf_ct_is_expired include/net/netfilter/nf_conntrack.h:286 [inline] ____nf_conntrack_find net/netfilter/nf_conntrack_core.c:776 [inline] __nf_conntrack_find_get+0x1c7/0xac0 net/netfilter/nf_conntrack_core.c:807 resolve_normal_ct+0x273/0x610 net/netfilter/nf_conntrack_core.c:1734 nf_conntrack_in+0x1c5/0x88f net/netfilter/nf_conntrack_core.c:1901 ipv6_conntrack_local+0x19/0x20 net/netfilter/nf_conntrack_proto.c:414 nf_hook_entry_hookfn include/linux/netfilter.h:142 [inline] nf_hook_slow+0x72/0x170 net/netfilter/core.c:619 nf_hook include/linux/netfilter.h:262 [inline] NF_HOOK include/linux/netfilter.h:305 [inline] ip6_xmit+0xa3a/0xa60 net/ipv6/ip6_output.c:324 inet6_csk_xmit+0x1a2/0x1e0 net/ipv6/inet6_connection_sock.c:135 __tcp_transmit_skb+0x132a/0x1840 net/ipv4/tcp_output.c:1402 __tcp_send_ack+0x1fd/0x300 net/ipv4/tcp_output.c:3956 tcp_send_ack+0x23/0x30 net/ipv4/tcp_output.c:3962 __tcp_ack_snd_check+0x2d8/0x510 net/ipv4/tcp_input.c:5478 tcp_ack_snd_check net/ipv4/tcp_input.c:5523 [inline] tcp_rcv_established+0x8c2/0x10e0 net/ipv4/tcp_input.c:5948 tcp_v6_do_rcv+0x36e/0xa50 net/ipv6/tcp_ipv6.c:1521 sk_backlog_rcv include/net/sock.h:1030 [inline] __release_sock+0xf2/0x270 net/core/sock.c:2768 release_sock+0x40/0x110 net/core/sock.c:3300 tcp_sendpage+0x94/0xb0 net/ipv4/tcp.c:1114 inet_sendpage+0x7f/0xc0 net/ipv4/af_inet.c:833 rds_tcp_xmit+0x376/0x5f0 net/rds/tcp_send.c:118 rds_send_xmit+0xbed/0x1500 net/rds/send.c:367 rds_send_worker+0x43/0x200 net/rds/threads.c:200 process_one_work+0x3fc/0x980 kernel/workqueue.c:2298 worker_thread+0x616/0xa70 kernel/workqueue.c:2445 kthread+0x2c7/0x2e0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 value changed: 0x00027cc2 -> 0x00000000 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 17446 Comm: kworker/u4:5 Tainted: G W 5.16.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: krdsd rds_send_worker Note: I chose an arbitrary commit for the Fixes: tag, because I do not think we need to backport this fix to very old kernels. Fixes: e37542ba111f ("netfilter: conntrack: avoid possible false sharing") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 6 +++--- net/netfilter/nf_conntrack_core.c | 6 +++--- net/netfilter/nf_conntrack_netlink.c | 2 +- net/netfilter/nf_flow_table_core.c | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index cc663c68ddc4..d24b0a34c8f0 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -276,14 +276,14 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) /* jiffies until ct expires, 0 if already expired */ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) { - s32 timeout = ct->timeout - nfct_time_stamp; + s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; return timeout > 0 ? timeout : 0; } static inline bool nf_ct_is_expired(const struct nf_conn *ct) { - return (__s32)(ct->timeout - nfct_time_stamp) <= 0; + return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0; } /* use after obtaining a reference count */ @@ -302,7 +302,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct) static inline void nf_ct_offload_timeout(struct nf_conn *ct) { if (nf_ct_expires(ct) < NF_CT_DAY / 2) - ct->timeout = nfct_time_stamp + NF_CT_DAY; + WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY); } struct kernel_param; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 770a63103c7a..4712a90a1820 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -684,7 +684,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) tstamp = nf_conn_tstamp_find(ct); if (tstamp) { - s32 timeout = ct->timeout - nfct_time_stamp; + s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; tstamp->stop = ktime_get_real_ns(); if (timeout < 0) @@ -1036,7 +1036,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) } /* We want the clashing entry to go away real soon: 1 second timeout. */ - loser_ct->timeout = nfct_time_stamp + HZ; + WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ); /* IPS_NAT_CLASH removes the entry automatically on the first * reply. Also prevents UDP tracker from moving the entry to @@ -1560,7 +1560,7 @@ __nf_conntrack_alloc(struct net *net, /* save hash for reusing when confirming */ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; ct->status = 0; - ct->timeout = 0; + WRITE_ONCE(ct->timeout, 0); write_pnet(&ct->ct_net, net); memset(&ct->__nfct_init_offset, 0, offsetof(struct nf_conn, proto) - diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c7708bde057c..81d03acf68d4 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1998,7 +1998,7 @@ static int ctnetlink_change_timeout(struct nf_conn *ct, if (timeout > INT_MAX) timeout = INT_MAX; - ct->timeout = nfct_time_stamp + (u32)timeout; + WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout); if (test_bit(IPS_DYING_BIT, &ct->status)) return -ETIME; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 87a7388b6c89..ed37bb9b4e58 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -201,8 +201,8 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) if (timeout < 0) timeout = 0; - if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout) - ct->timeout = nfct_time_stamp + timeout; + if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout) + WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout); } static void flow_offload_fixup_ct_state(struct nf_conn *ct) From d76c51f976ed1095dcd8c5c85ec9d8fed77a3e05 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 7 Dec 2021 00:39:31 +0300 Subject: [PATCH 223/253] selftests: tls: add missing AES-CCM cipher tests Add tests for TLSv1.2 and TLSv1.3 with AES-CCM cipher. Signed-off-by: Vadim Fedorenko Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 8a22db0cca49..fb1bb402ee10 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -31,6 +31,7 @@ struct tls_crypto_info_keys { struct tls12_crypto_info_chacha20_poly1305 chacha20; struct tls12_crypto_info_sm4_gcm sm4gcm; struct tls12_crypto_info_sm4_ccm sm4ccm; + struct tls12_crypto_info_aes_ccm_128 aesccm128; }; size_t len; }; @@ -61,6 +62,11 @@ static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type, tls12->sm4ccm.info.version = tls_version; tls12->sm4ccm.info.cipher_type = cipher_type; break; + case TLS_CIPHER_AES_CCM_128: + tls12->len = sizeof(struct tls12_crypto_info_aes_ccm_128); + tls12->aesccm128.info.version = tls_version; + tls12->aesccm128.info.cipher_type = cipher_type; + break; default: break; } @@ -261,6 +267,18 @@ FIXTURE_VARIANT_ADD(tls, 13_sm4_ccm) .cipher_type = TLS_CIPHER_SM4_CCM, }; +FIXTURE_VARIANT_ADD(tls, 12_aes_ccm) +{ + .tls_version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_AES_CCM_128, +}; + +FIXTURE_VARIANT_ADD(tls, 13_aes_ccm) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_AES_CCM_128, +}; + FIXTURE_SETUP(tls) { struct tls_crypto_info_keys tls12; From 13bf99ab2130783e2b1988ef415585e3af7df97b Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 7 Dec 2021 00:39:32 +0300 Subject: [PATCH 224/253] selftests: tls: add missing AES256-GCM cipher Add tests for TLSv1.2 and TLSv1.3 with AES256-GCM cipher Signed-off-by: Vadim Fedorenko Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index fb1bb402ee10..6e468e0f42f7 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -32,6 +32,7 @@ struct tls_crypto_info_keys { struct tls12_crypto_info_sm4_gcm sm4gcm; struct tls12_crypto_info_sm4_ccm sm4ccm; struct tls12_crypto_info_aes_ccm_128 aesccm128; + struct tls12_crypto_info_aes_gcm_256 aesgcm256; }; size_t len; }; @@ -67,6 +68,11 @@ static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type, tls12->aesccm128.info.version = tls_version; tls12->aesccm128.info.cipher_type = cipher_type; break; + case TLS_CIPHER_AES_GCM_256: + tls12->len = sizeof(struct tls12_crypto_info_aes_gcm_256); + tls12->aesgcm256.info.version = tls_version; + tls12->aesgcm256.info.cipher_type = cipher_type; + break; default: break; } @@ -279,6 +285,18 @@ FIXTURE_VARIANT_ADD(tls, 13_aes_ccm) .cipher_type = TLS_CIPHER_AES_CCM_128, }; +FIXTURE_VARIANT_ADD(tls, 12_aes_gcm_256) +{ + .tls_version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_256, +}; + +FIXTURE_VARIANT_ADD(tls, 13_aes_gcm_256) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_256, +}; + FIXTURE_SETUP(tls) { struct tls_crypto_info_keys tls12; From 6ebe4b350833a535d1df02591911b5f5fba3b275 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 6 Dec 2021 17:17:23 +0100 Subject: [PATCH 225/253] MAINTAINERS: net: mlxsw: Remove Jiri as a maintainer, add myself Jiri has moved on and will not carry out the mlxsw maintainership duty any longer. Add myself as a co-maintainer instead. Signed-off-by: Petr Machata Acked-by: Jiri Pirko Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/45b54312cdebaf65c5d110b15a5dd2df795bf2be.1638807297.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index faa9c34d837d..7e51081b6708 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12180,8 +12180,8 @@ F: drivers/net/ethernet/mellanox/mlx5/core/fpga/* F: include/linux/mlx5/mlx5_ifc_fpga.h MELLANOX ETHERNET SWITCH DRIVERS -M: Jiri Pirko M: Ido Schimmel +M: Petr Machata L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com From e6f60c51f0435862020bcd2d1e3257caaafe5650 Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Sun, 5 Dec 2021 23:38:10 +0500 Subject: [PATCH 226/253] gve: fix for null pointer dereference. Avoid passing NULL skb to __skb_put() function call if napi_alloc_skb() returns NULL. Fixes: 37149e9374bf ("gve: Implement packet continuation for RX.") Signed-off-by: Ameer Hamza Link: https://lore.kernel.org/r/20211205183810.8299-1-amhamza.mgc@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_utils.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c index 88ca49cbc1e2..d57508bc4307 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.c +++ b/drivers/net/ethernet/google/gve/gve_utils.c @@ -68,6 +68,9 @@ struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, set_protocol = ctx->curr_frag_cnt == ctx->expected_frag_cnt - 1; } else { skb = napi_alloc_skb(napi, len); + + if (unlikely(!skb)) + return NULL; set_protocol = true; } __skb_put(skb, len); From a97770cc4016c2733bcef9dbe3d5b1ad02d13356 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Mon, 6 Dec 2021 16:12:27 +0800 Subject: [PATCH 227/253] net: phy: Remove unnecessary indentation in the comments of phy_device Fix warning as: linux-next/Documentation/networking/kapi:122: ./include/linux/phy.h:543: WARNING: Unexpected indentation. linux-next/Documentation/networking/kapi:122: ./include/linux/phy.h:544: WARNING: Block quote ends without a blank line; unexpected unindent. linux-next/Documentation/networking/kapi:122: ./include/linux/phy.h:546: WARNING: Unexpected indentation. Suggested-by: Akira Yokosawa Signed-off-by: Yanteng Si Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/phy.h b/include/linux/phy.h index 96e43fbb2dd8..cbf03a5f9cf5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -538,11 +538,12 @@ struct macsec_ops; * @mac_managed_pm: Set true if MAC driver takes of suspending/resuming PHY * @state: State of the PHY for management purposes * @dev_flags: Device-specific flags used by the PHY driver. - * Bits [15:0] are free to use by the PHY driver to communicate - * driver specific behavior. - * Bits [23:16] are currently reserved for future use. - * Bits [31:24] are reserved for defining generic - * PHY driver behavior. + * + * - Bits [15:0] are free to use by the PHY driver to communicate + * driver specific behavior. + * - Bits [23:16] are currently reserved for future use. + * - Bits [31:24] are reserved for defining generic + * PHY driver behavior. * @irq: IRQ number of the PHY's interrupt (-1 if none) * @phy_timer: The timer for handling the state machine * @phylink: Pointer to phylink instance for this PHY From c35e8de704560846dab964a2df2e548818a424d3 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Mon, 6 Dec 2021 16:12:28 +0800 Subject: [PATCH 228/253] net: phy: Add the missing blank line in the phylink_suspend comment Fix warning as: Documentation/networking/kapi:147: ./drivers/net/phy/phylink.c:1657: WARNING: Unexpected indentation. Documentation/networking/kapi:147: ./drivers/net/phy/phylink.c:1658: WARNING: Block quote ends without a blank line; unexpected unindent. Signed-off-by: Yanteng Si Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 5904546acae6..ea82ea5660e7 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1388,6 +1388,7 @@ EXPORT_SYMBOL_GPL(phylink_stop); * @mac_wol: true if the MAC needs to receive packets for Wake-on-Lan * * Handle a network device suspend event. There are several cases: + * * - If Wake-on-Lan is not active, we can bring down the link between * the MAC and PHY by calling phylink_stop(). * - If Wake-on-Lan is active, and being handled only by the PHY, we From b5bd95d17102b6719e3531d627875b9690371383 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 6 Dec 2021 21:54:57 +0800 Subject: [PATCH 229/253] net: fec: only clear interrupt of handling queue in fec_enet_rx_queue() Background: We have a customer is running a Profinet stack on the 8MM which receives and responds PNIO packets every 4ms and PNIO-CM packets every 40ms. However, from time to time the received PNIO-CM package is "stock" and is only handled when receiving a new PNIO-CM or DCERPC-Ping packet (tcpdump shows the PNIO-CM and the DCERPC-Ping packet at the same time but the PNIO-CM HW timestamp is from the expected 40 ms and not the 2s delay of the DCERPC-Ping). After debugging, we noticed PNIO, PNIO-CM and DCERPC-Ping packets would be handled by different RX queues. The root cause should be driver ack all queues' interrupt when handle a specific queue in fec_enet_rx_queue(). The blamed patch is introduced to receive as much packets as possible once to avoid interrupt flooding. But it's unreasonable to clear other queues'interrupt when handling one queue, this patch tries to fix it. Fixes: ed63f1dcd578 (net: fec: clear receive interrupts before processing a packet) Cc: Russell King Reported-by: Nicolas Diaz Signed-off-by: Joakim Zhang Link: https://lore.kernel.org/r/20211206135457.15946-1-qiangqing.zhang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec.h | 3 +++ drivers/net/ethernet/freescale/fec_main.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 7b4961daa254..ed7301b69169 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -377,6 +377,9 @@ struct bufdesc_ex { #define FEC_ENET_WAKEUP ((uint)0x00020000) /* Wakeup request */ #define FEC_ENET_TXF (FEC_ENET_TXF_0 | FEC_ENET_TXF_1 | FEC_ENET_TXF_2) #define FEC_ENET_RXF (FEC_ENET_RXF_0 | FEC_ENET_RXF_1 | FEC_ENET_RXF_2) +#define FEC_ENET_RXF_GET(X) (((X) == 0) ? FEC_ENET_RXF_0 : \ + (((X) == 1) ? FEC_ENET_RXF_1 : \ + FEC_ENET_RXF_2)) #define FEC_ENET_TS_AVAIL ((uint)0x00010000) #define FEC_ENET_TS_TIMER ((uint)0x00008000) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index bc418b910999..1b1f7f2a6130 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1480,7 +1480,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) break; pkt_received++; - writel(FEC_ENET_RXF, fep->hwp + FEC_IEVENT); + writel(FEC_ENET_RXF_GET(queue_id), fep->hwp + FEC_IEVENT); /* Check for errors. */ status ^= BD_ENET_RX_LAST; From b560b21f71eb4ef9dfc7c8ec1d0e4d7f9aa54b51 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 7 Dec 2021 10:15:21 +0200 Subject: [PATCH 230/253] bpf: Add selftests to cover packet access corner cases This commit adds BPF verifier selftests that cover all corner cases by packet boundary checks. Specifically, 8-byte packet reads are tested at the beginning of data and at the beginning of data_meta, using all kinds of boundary checks (all comparison operators: <, >, <=, >=; both permutations of operands: data + length compared to end, end compared to data + length). For each case there are three tests: 1. Length is just enough for an 8-byte read. Length is either 7 or 8, depending on the comparison. 2. Length is increased by 1 - should still pass the verifier. These cases are useful, because they failed before commit 2fa7d94afc1a ("bpf: Fix the off-by-two error in range markings"). 3. Length is decreased by 1 - should be rejected by the verifier. Some existing tests are just renamed to avoid duplication. Signed-off-by: Maxim Mikityanskiy Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211207081521.41923-1-maximmi@nvidia.com --- .../bpf/verifier/xdp_direct_packet_access.c | 600 +++++++++++++++++- 1 file changed, 584 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c index de172a5b8754..b4ec228eb95d 100644 --- a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c +++ b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c @@ -35,7 +35,7 @@ .prog_type = BPF_PROG_TYPE_XDP, }, { - "XDP pkt read, pkt_data' > pkt_end, good access", + "XDP pkt read, pkt_data' > pkt_end, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -87,6 +87,41 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data' > pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' > pkt_end, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_end > pkt_data', good access", .insns = { @@ -106,7 +141,7 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end > pkt_data', bad access 1", + "XDP pkt read, pkt_end > pkt_data', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -142,6 +177,42 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_end > pkt_data', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end > pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_data' < pkt_end, good access", .insns = { @@ -161,7 +232,7 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data' < pkt_end, bad access 1", + "XDP pkt read, pkt_data' < pkt_end, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -198,7 +269,43 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end < pkt_data', good access", + "XDP pkt read, pkt_data' < pkt_end, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' < pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end < pkt_data', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -250,6 +357,41 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_end < pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end < pkt_data', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_data' >= pkt_end, good access", .insns = { @@ -268,7 +410,7 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data' >= pkt_end, bad access 1", + "XDP pkt read, pkt_data' >= pkt_end, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -304,7 +446,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end >= pkt_data', good access", + "XDP pkt read, pkt_data' >= pkt_end, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' >= pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end >= pkt_data', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -359,7 +535,44 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data' <= pkt_end, good access", + "XDP pkt read, pkt_end >= pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end >= pkt_data', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' <= pkt_end, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -413,6 +626,43 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data' <= pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' <= pkt_end, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_end <= pkt_data', good access", .insns = { @@ -431,7 +681,7 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end <= pkt_data', bad access 1", + "XDP pkt read, pkt_end <= pkt_data', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -467,7 +717,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' > pkt_data, good access", + "XDP pkt read, pkt_end <= pkt_data', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end <= pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' > pkt_data, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -519,6 +803,41 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_meta' > pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' > pkt_data, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_data > pkt_meta', good access", .insns = { @@ -538,7 +857,7 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data > pkt_meta', bad access 1", + "XDP pkt read, pkt_data > pkt_meta', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -574,6 +893,42 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data > pkt_meta', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data > pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_meta' < pkt_data, good access", .insns = { @@ -593,7 +948,7 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' < pkt_data, bad access 1", + "XDP pkt read, pkt_meta' < pkt_data, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -630,7 +985,43 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data < pkt_meta', good access", + "XDP pkt read, pkt_meta' < pkt_data, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' < pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data < pkt_meta', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -682,6 +1073,41 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data < pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data < pkt_meta', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_meta' >= pkt_data, good access", .insns = { @@ -700,7 +1126,7 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' >= pkt_data, bad access 1", + "XDP pkt read, pkt_meta' >= pkt_data, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -736,7 +1162,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data >= pkt_meta', good access", + "XDP pkt read, pkt_meta' >= pkt_data, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' >= pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data >= pkt_meta', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -791,7 +1251,44 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' <= pkt_data, good access", + "XDP pkt read, pkt_data >= pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data >= pkt_meta', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' <= pkt_data, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -845,6 +1342,43 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_meta' <= pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' <= pkt_data, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_data <= pkt_meta', good access", .insns = { @@ -863,7 +1397,7 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data <= pkt_meta', bad access 1", + "XDP pkt read, pkt_data <= pkt_meta', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -898,3 +1432,37 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data <= pkt_meta', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data <= pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, From 14902f8961dca9c66bf190f7b1583767c97a4197 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 7 Dec 2021 13:10:53 +0100 Subject: [PATCH 231/253] HID: Ignore battery for Elan touchscreen on Asus UX550VE Battery status is reported for the Asus UX550VE touchscreen even though it does not have a battery. Prevent it from always reporting the battery as low. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1897823 Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-input.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index ca418bffd3b2..19da07777d62 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -399,6 +399,7 @@ #define USB_DEVICE_ID_HP_X2_10_COVER 0x0755 #define I2C_DEVICE_ID_HP_ENVY_X360_15 0x2d05 #define I2C_DEVICE_ID_HP_SPECTRE_X360_15 0x2817 +#define USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN 0x2544 #define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN 0x2706 #define I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN 0x261A diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 217f2d1b91c5..03f994541981 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -325,6 +325,8 @@ static const struct hid_device_id hid_battery_quirks[] = { HID_BATTERY_QUIRK_IGNORE }, { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN), HID_BATTERY_QUIRK_IGNORE }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN), + HID_BATTERY_QUIRK_IGNORE }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_15), HID_BATTERY_QUIRK_IGNORE }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_15), From 1a0f25a52e08b1f67510cabbb44888d2b3c46359 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Fri, 12 Nov 2021 17:06:02 -0800 Subject: [PATCH 232/253] ice: safer stats processing The driver was zeroing live stats that could be fetched by ndo_get_stats64 at any time. This could result in inconsistent statistics, and the telltale sign was when reading stats frequently from /proc/net/dev, the stats would go backwards. Fix by collecting stats into a local, and delaying when we write to the structure so it's not incremental. Fixes: fcea6f3da546 ("ice: Add stats and ethtool support") Signed-off-by: Jesse Brandeburg Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 29 ++++++++++++++--------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c6d6ce52e2ca..73c61cdb036f 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5930,14 +5930,15 @@ ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp, struct ice_q_stats st /** * ice_update_vsi_tx_ring_stats - Update VSI Tx ring stats counters * @vsi: the VSI to be updated + * @vsi_stats: the stats struct to be updated * @rings: rings to work on * @count: number of rings */ static void -ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, struct ice_tx_ring **rings, - u16 count) +ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, + struct rtnl_link_stats64 *vsi_stats, + struct ice_tx_ring **rings, u16 count) { - struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats; u16 i; for (i = 0; i < count; i++) { @@ -5961,15 +5962,13 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, struct ice_tx_ring **rings, */ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) { - struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats; + struct rtnl_link_stats64 *vsi_stats; u64 pkts, bytes; int i; - /* reset netdev stats */ - vsi_stats->tx_packets = 0; - vsi_stats->tx_bytes = 0; - vsi_stats->rx_packets = 0; - vsi_stats->rx_bytes = 0; + vsi_stats = kzalloc(sizeof(*vsi_stats), GFP_ATOMIC); + if (!vsi_stats) + return; /* reset non-netdev (extended) stats */ vsi->tx_restart = 0; @@ -5981,7 +5980,8 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) rcu_read_lock(); /* update Tx rings counters */ - ice_update_vsi_tx_ring_stats(vsi, vsi->tx_rings, vsi->num_txq); + ice_update_vsi_tx_ring_stats(vsi, vsi_stats, vsi->tx_rings, + vsi->num_txq); /* update Rx rings counters */ ice_for_each_rxq(vsi, i) { @@ -5996,10 +5996,17 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) /* update XDP Tx rings counters */ if (ice_is_xdp_ena_vsi(vsi)) - ice_update_vsi_tx_ring_stats(vsi, vsi->xdp_rings, + ice_update_vsi_tx_ring_stats(vsi, vsi_stats, vsi->xdp_rings, vsi->num_xdp_txq); rcu_read_unlock(); + + vsi->net_stats.tx_packets = vsi_stats->tx_packets; + vsi->net_stats.tx_bytes = vsi_stats->tx_bytes; + vsi->net_stats.rx_packets = vsi_stats->rx_packets; + vsi->net_stats.rx_bytes = vsi_stats->rx_bytes; + + kfree(vsi_stats); } /** From 2b29cb9e3f7f038c7f50ad2583b47caf5cb1eaf2 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 7 Dec 2021 10:32:43 +0000 Subject: [PATCH 233/253] net: dsa: mv88e6xxx: fix "don't use PHY_DETECT on internal PHY's" This commit fixes a misunderstanding in commit 4a3e0aeddf09 ("net: dsa: mv88e6xxx: don't use PHY_DETECT on internal PHY's"). For Marvell DSA switches with the PHY_DETECT bit (for non-6250 family devices), controls whether the PPU polls the PHY to retrieve the link, speed, duplex and pause status to update the port configuration. This applies for both internal and external PHYs. For some switches such as 88E6352 and 88E6390X, PHY_DETECT has an additional function of enabling auto-media mode between the internal PHY and SERDES blocks depending on which first gains link. The original intention of commit 5d5b231da7ac (net: dsa: mv88e6xxx: use PHY_DETECT in mac_link_up/mac_link_down) was to allow this bit to be used to detect when this propagation is enabled, and allow software to update the port configuration. This has found to be necessary for some switches which do not automatically propagate status from the SERDES to the port, which includes the 88E6390. However, commit 4a3e0aeddf09 ("net: dsa: mv88e6xxx: don't use PHY_DETECT on internal PHY's") breaks this assumption. Maarten Zanders has confirmed that the issue he was addressing was for an 88E6250 switch, which does not have a PHY_DETECT bit in bit 12, but instead a link status bit. Therefore, mv88e6xxx_port_ppu_updates() does not report correctly. This patch resolves the above issues by reverting Maarten's change and instead making mv88e6xxx_port_ppu_updates() indicate whether the port is internal for the 88E6250 family of switches. Yes, you're right, I'm targeting the 6250 family. And yes, your suggestion would solve my case and is a better implementation for the other devices (as far as I can see). Fixes: 4a3e0aeddf09 ("net: dsa: mv88e6xxx: don't use PHY_DETECT on internal PHY's") Signed-off-by: Russell King Tested-by: Maarten Zanders Link: https://lore.kernel.org/r/E1muXm7-00EwJB-7n@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index f00cbf5753b9..9f675464efc3 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -471,6 +471,12 @@ static int mv88e6xxx_port_ppu_updates(struct mv88e6xxx_chip *chip, int port) u16 reg; int err; + /* The 88e6250 family does not have the PHY detect bit. Instead, + * report whether the port is internal. + */ + if (chip->info->family == MV88E6XXX_FAMILY_6250) + return port < chip->info->num_internal_phys; + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, ®); if (err) { dev_err(chip->dev, @@ -752,11 +758,10 @@ static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port, ops = chip->info->ops; mv88e6xxx_reg_lock(chip); - /* Internal PHYs propagate their configuration directly to the MAC. - * External PHYs depend on whether the PPU is enabled for this port. + /* Force the link down if we know the port may not be automatically + * updated by the switch or if we are using fixed-link mode. */ - if (((!mv88e6xxx_phy_is_internal(ds, port) && - !mv88e6xxx_port_ppu_updates(chip, port)) || + if ((!mv88e6xxx_port_ppu_updates(chip, port) || mode == MLO_AN_FIXED) && ops->port_sync_link) err = ops->port_sync_link(chip, port, mode, false); mv88e6xxx_reg_unlock(chip); @@ -779,11 +784,11 @@ static void mv88e6xxx_mac_link_up(struct dsa_switch *ds, int port, ops = chip->info->ops; mv88e6xxx_reg_lock(chip); - /* Internal PHYs propagate their configuration directly to the MAC. - * External PHYs depend on whether the PPU is enabled for this port. + /* Configure and force the link up if we know that the port may not + * automatically updated by the switch or if we are using fixed-link + * mode. */ - if ((!mv88e6xxx_phy_is_internal(ds, port) && - !mv88e6xxx_port_ppu_updates(chip, port)) || + if (!mv88e6xxx_port_ppu_updates(chip, port) || mode == MLO_AN_FIXED) { /* FIXME: for an automedia port, should we force the link * down here - what if the link comes up due to "other" media From e195e9b5dee6459d8c8e6a314cc71a644a0537fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Dec 2021 08:53:29 -0800 Subject: [PATCH 234/253] net, neigh: clear whole pneigh_entry at alloc time Commit 2c611ad97a82 ("net, neigh: Extend neigh->flags to 32 bit to allow for extensions") enables a new KMSAM warning [1] I think the bug is actually older, because the following intruction only occurred if ndm->ndm_flags had NTF_PROXY set. pn->flags = ndm->ndm_flags; Let's clear all pneigh_entry fields at alloc time. [1] BUG: KMSAN: uninit-value in pneigh_fill_info+0x986/0xb30 net/core/neighbour.c:2593 pneigh_fill_info+0x986/0xb30 net/core/neighbour.c:2593 pneigh_dump_table net/core/neighbour.c:2715 [inline] neigh_dump_info+0x1e3f/0x2c60 net/core/neighbour.c:2832 netlink_dump+0xaca/0x16a0 net/netlink/af_netlink.c:2265 __netlink_dump_start+0xd1c/0xee0 net/netlink/af_netlink.c:2370 netlink_dump_start include/linux/netlink.h:254 [inline] rtnetlink_rcv_msg+0x181b/0x18c0 net/core/rtnetlink.c:5534 netlink_rcv_skb+0x447/0x800 net/netlink/af_netlink.c:2491 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5589 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x1095/0x1360 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x16f3/0x1870 net/netlink/af_netlink.c:1916 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg net/socket.c:724 [inline] sock_write_iter+0x594/0x690 net/socket.c:1057 call_write_iter include/linux/fs.h:2162 [inline] new_sync_write fs/read_write.c:503 [inline] vfs_write+0x1318/0x2030 fs/read_write.c:590 ksys_write+0x28c/0x520 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0xdb/0x120 fs/read_write.c:652 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae Uninit was created at: slab_post_alloc_hook mm/slab.h:524 [inline] slab_alloc_node mm/slub.c:3251 [inline] slab_alloc mm/slub.c:3259 [inline] __kmalloc+0xc3c/0x12d0 mm/slub.c:4437 kmalloc include/linux/slab.h:595 [inline] pneigh_lookup+0x60f/0xd70 net/core/neighbour.c:766 arp_req_set_public net/ipv4/arp.c:1016 [inline] arp_req_set+0x430/0x10a0 net/ipv4/arp.c:1032 arp_ioctl+0x8d4/0xb60 net/ipv4/arp.c:1232 inet_ioctl+0x4ef/0x820 net/ipv4/af_inet.c:947 sock_do_ioctl net/socket.c:1118 [inline] sock_ioctl+0xa3f/0x13e0 net/socket.c:1235 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl+0x2df/0x4a0 fs/ioctl.c:860 __x64_sys_ioctl+0xd8/0x110 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae CPU: 1 PID: 20001 Comm: syz-executor.0 Not tainted 5.16.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 62dd93181aaa ("[IPV6] NDISC: Set per-entry is_router flag in Proxy NA.") Signed-off-by: Eric Dumazet Cc: Roopa Prabhu Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20211206165329.1049835-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 72ba027c34cf..dda12fbd177b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -763,11 +763,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, ASSERT_RTNL(); - n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); + n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL); if (!n) goto out; - n->protocol = 0; write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; From f71ef02f1a4a3c49962fa341ad8de19071f0f9bf Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Tue, 7 Dec 2021 00:17:37 -0800 Subject: [PATCH 235/253] vmxnet3: fix minimum vectors alloc issue 'Commit 39f9895a00f4 ("vmxnet3: add support for 32 Tx/Rx queues")' added support for 32Tx/Rx queues. Within that patch, value of VMXNET3_LINUX_MIN_MSIX_VECT was updated. However, there is a case (numvcpus = 2) which actually requires 3 intrs which matches VMXNET3_LINUX_MIN_MSIX_VECT which then is treated as failure by stack to allocate more vectors. This patch fixes this issue. Fixes: 39f9895a00f4 ("vmxnet3: add support for 32 Tx/Rx queues") Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Link: https://lore.kernel.org/r/20211207081737.14000-1-doshir@vmware.com Signed-off-by: Jakub Kicinski --- drivers/net/vmxnet3/vmxnet3_drv.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 14fae317bc70..fd407c0e2856 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3261,7 +3261,7 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) #ifdef CONFIG_PCI_MSI if (adapter->intr.type == VMXNET3_IT_MSIX) { - int i, nvec; + int i, nvec, nvec_allocated; nvec = adapter->share_intr == VMXNET3_INTR_TXSHARE ? 1 : adapter->num_tx_queues; @@ -3274,14 +3274,15 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) for (i = 0; i < nvec; i++) adapter->intr.msix_entries[i].entry = i; - nvec = vmxnet3_acquire_msix_vectors(adapter, nvec); - if (nvec < 0) + nvec_allocated = vmxnet3_acquire_msix_vectors(adapter, nvec); + if (nvec_allocated < 0) goto msix_err; /* If we cannot allocate one MSIx vector per queue * then limit the number of rx queues to 1 */ - if (nvec == VMXNET3_LINUX_MIN_MSIX_VECT) { + if (nvec_allocated == VMXNET3_LINUX_MIN_MSIX_VECT && + nvec != VMXNET3_LINUX_MIN_MSIX_VECT) { if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE || adapter->num_rx_queues != 1) { adapter->share_intr = VMXNET3_INTR_TXSHARE; @@ -3291,14 +3292,14 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) } } - adapter->intr.num_intrs = nvec; + adapter->intr.num_intrs = nvec_allocated; return; msix_err: /* If we cannot allocate MSIx vectors use only one rx queue */ dev_info(&adapter->pdev->dev, "Failed to enable MSI-X, error %d. " - "Limiting #rx queues to 1, try MSI.\n", nvec); + "Limiting #rx queues to 1, try MSI.\n", nvec_allocated); adapter->intr.type = VMXNET3_IT_MSI; } From a50e659b2a1be14784e80f8492aab177e67c53a2 Mon Sep 17 00:00:00 2001 From: Louis Amas Date: Tue, 7 Dec 2021 15:34:22 +0100 Subject: [PATCH 236/253] net: mvpp2: fix XDP rx queues registering The registration of XDP queue information is incorrect because the RX queue id we use is invalid. When port->id == 0 it appears to works as expected yet it's no longer the case when port->id != 0. The problem arised while using a recent kernel version on the MACCHIATOBin. This board has several ports: * eth0 and eth1 are 10Gbps interfaces ; both ports has port->id == 0; * eth2 is a 1Gbps interface with port->id != 0. Code from xdp-tutorial (more specifically advanced03-AF_XDP) was used to test packet capture and injection on all these interfaces. The XDP kernel was simplified to: SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) { int index = ctx->rx_queue_index; /* A set entry here means that the correspnding queue_id * has an active AF_XDP socket bound to it. */ if (bpf_map_lookup_elem(&xsks_map, &index)) return bpf_redirect_map(&xsks_map, index, 0); return XDP_PASS; } Starting the program using: ./af_xdp_user -d DEV Gives the following result: * eth0 : ok * eth1 : ok * eth2 : no capture, no injection Investigating the issue shows that XDP rx queues for eth2 are wrong: XDP expects their id to be in the range [0..3] but we found them to be in the range [32..35]. Trying to force rx queue ids using: ./af_xdp_user -d eth2 -Q 32 fails as expected (we shall not have more than 4 queues). When we register the XDP rx queue information (using xdp_rxq_info_reg() in function mvpp2_rxq_init()) we tell it to use rxq->id as the queue id. This value is computed as: rxq->id = port->id * max_rxq_count + queue_id where max_rxq_count depends on the device version. In the MACCHIATOBin case, this value is 32, meaning that rx queues on eth2 are numbered from 32 to 35 - there are four of them. Clearly, this is not the per-port queue id that XDP is expecting: it wants a value in the range [0..3]. It shall directly use queue_id which is stored in rxq->logic_rxq -- so let's use that value instead. rxq->id is left untouched ; its value is indeed valid but it should not be used in this context. This is consistent with the remaining part of the code in mvpp2_rxq_init(). With this change, packet capture is working as expected on all the MACCHIATOBin ports. Fixes: b27db2274ba8 ("mvpp2: use page_pool allocator") Signed-off-by: Louis Amas Signed-off-by: Emmanuel Deloget Reviewed-by: Marcin Wojtas Acked-by: John Fastabend Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/20211207143423.916334-1-louis.amas@eho.link Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 6480696c979b..6da8a595026b 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -2960,11 +2960,11 @@ static int mvpp2_rxq_init(struct mvpp2_port *port, mvpp2_rxq_status_update(port, rxq->id, 0, rxq->size); if (priv->percpu_pools) { - err = xdp_rxq_info_reg(&rxq->xdp_rxq_short, port->dev, rxq->id, 0); + err = xdp_rxq_info_reg(&rxq->xdp_rxq_short, port->dev, rxq->logic_rxq, 0); if (err < 0) goto err_free_dma; - err = xdp_rxq_info_reg(&rxq->xdp_rxq_long, port->dev, rxq->id, 0); + err = xdp_rxq_info_reg(&rxq->xdp_rxq_long, port->dev, rxq->logic_rxq, 0); if (err < 0) goto err_unregister_rxq_short; From 36aea60fc892ce73f96d45dc7eb239c7c4c1fa69 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Wed, 8 Dec 2021 16:21:21 +0100 Subject: [PATCH 237/253] can: kvaser_pciefd: kvaser_pciefd_rx_error_frame(): increase correct stats->{rx,tx}_errors counter Check the direction bit in the error frame packet (EPACK) to determine which net_device_stats {rx,tx}_errors counter to increase. Fixes: 26ad340e582d ("can: kvaser_pciefd: Add driver for Kvaser PCIEcan devices") Link: https://lore.kernel.org/all/20211208152122.250852-1-extja@kvaser.com Cc: stable@vger.kernel.org Signed-off-by: Jimmy Assarsson Signed-off-by: Marc Kleine-Budde --- drivers/net/can/kvaser_pciefd.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index 74d9899fc904..eb74cdf26b88 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -248,6 +248,9 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices"); #define KVASER_PCIEFD_SPACK_EWLR BIT(23) #define KVASER_PCIEFD_SPACK_EPLR BIT(24) +/* Kvaser KCAN_EPACK second word */ +#define KVASER_PCIEFD_EPACK_DIR_TX BIT(0) + struct kvaser_pciefd; struct kvaser_pciefd_can { @@ -1285,7 +1288,10 @@ static int kvaser_pciefd_rx_error_frame(struct kvaser_pciefd_can *can, can->err_rep_cnt++; can->can.can_stats.bus_error++; - stats->rx_errors++; + if (p->header[1] & KVASER_PCIEFD_EPACK_DIR_TX) + stats->tx_errors++; + else + stats->rx_errors++; can->bec.txerr = bec.txerr; can->bec.rxerr = bec.rxerr; From fb12797ab1fef480ad8a32a30984844444eeb00d Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Wed, 8 Dec 2021 16:21:22 +0100 Subject: [PATCH 238/253] can: kvaser_usb: get CAN clock frequency from device The CAN clock frequency is used when calculating the CAN bittiming parameters. When wrong clock frequency is used, the device may end up with wrong bittiming parameters, depending on user requested bittiming parameters. To avoid this, get the CAN clock frequency from the device. Various existing Kvaser Leaf products use different CAN clocks. Fixes: 080f40a6fa28 ("can: kvaser_usb: Add support for Kvaser CAN/USB devices") Link: https://lore.kernel.org/all/20211208152122.250852-2-extja@kvaser.com Cc: stable@vger.kernel.org Signed-off-by: Jimmy Assarsson Signed-off-by: Marc Kleine-Budde --- .../net/can/usb/kvaser_usb/kvaser_usb_leaf.c | 101 +++++++++++++----- 1 file changed, 73 insertions(+), 28 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c index 59ba7c7beec0..f7af1bf5ab46 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c @@ -28,10 +28,6 @@ #include "kvaser_usb.h" -/* Forward declaration */ -static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg; - -#define CAN_USB_CLOCK 8000000 #define MAX_USBCAN_NET_DEVICES 2 /* Command header size */ @@ -80,6 +76,12 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg; #define CMD_LEAF_LOG_MESSAGE 106 +/* Leaf frequency options */ +#define KVASER_USB_LEAF_SWOPTION_FREQ_MASK 0x60 +#define KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK 0 +#define KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK BIT(5) +#define KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK BIT(6) + /* error factors */ #define M16C_EF_ACKE BIT(0) #define M16C_EF_CRCE BIT(1) @@ -340,6 +342,50 @@ struct kvaser_usb_err_summary { }; }; +static const struct can_bittiming_const kvaser_usb_leaf_bittiming_const = { + .name = "kvaser_usb", + .tseg1_min = KVASER_USB_TSEG1_MIN, + .tseg1_max = KVASER_USB_TSEG1_MAX, + .tseg2_min = KVASER_USB_TSEG2_MIN, + .tseg2_max = KVASER_USB_TSEG2_MAX, + .sjw_max = KVASER_USB_SJW_MAX, + .brp_min = KVASER_USB_BRP_MIN, + .brp_max = KVASER_USB_BRP_MAX, + .brp_inc = KVASER_USB_BRP_INC, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_8mhz = { + .clock = { + .freq = 8000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_leaf_bittiming_const, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_16mhz = { + .clock = { + .freq = 16000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_leaf_bittiming_const, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_24mhz = { + .clock = { + .freq = 24000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_leaf_bittiming_const, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_32mhz = { + .clock = { + .freq = 32000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_leaf_bittiming_const, +}; + static void * kvaser_usb_leaf_frame_to_cmd(const struct kvaser_usb_net_priv *priv, const struct sk_buff *skb, int *frame_len, @@ -471,6 +517,27 @@ static int kvaser_usb_leaf_send_simple_cmd(const struct kvaser_usb *dev, return rc; } +static void kvaser_usb_leaf_get_software_info_leaf(struct kvaser_usb *dev, + const struct leaf_cmd_softinfo *softinfo) +{ + u32 sw_options = le32_to_cpu(softinfo->sw_options); + + dev->fw_version = le32_to_cpu(softinfo->fw_version); + dev->max_tx_urbs = le16_to_cpu(softinfo->max_outstanding_tx); + + switch (sw_options & KVASER_USB_LEAF_SWOPTION_FREQ_MASK) { + case KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_dev_cfg_16mhz; + break; + case KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_dev_cfg_24mhz; + break; + case KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_dev_cfg_32mhz; + break; + } +} + static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev) { struct kvaser_cmd cmd; @@ -486,14 +553,13 @@ static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev) switch (dev->card_data.leaf.family) { case KVASER_LEAF: - dev->fw_version = le32_to_cpu(cmd.u.leaf.softinfo.fw_version); - dev->max_tx_urbs = - le16_to_cpu(cmd.u.leaf.softinfo.max_outstanding_tx); + kvaser_usb_leaf_get_software_info_leaf(dev, &cmd.u.leaf.softinfo); break; case KVASER_USBCAN: dev->fw_version = le32_to_cpu(cmd.u.usbcan.softinfo.fw_version); dev->max_tx_urbs = le16_to_cpu(cmd.u.usbcan.softinfo.max_outstanding_tx); + dev->cfg = &kvaser_usb_leaf_dev_cfg_8mhz; break; } @@ -1225,24 +1291,11 @@ static int kvaser_usb_leaf_init_card(struct kvaser_usb *dev) { struct kvaser_usb_dev_card_data *card_data = &dev->card_data; - dev->cfg = &kvaser_usb_leaf_dev_cfg; card_data->ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES; return 0; } -static const struct can_bittiming_const kvaser_usb_leaf_bittiming_const = { - .name = "kvaser_usb", - .tseg1_min = KVASER_USB_TSEG1_MIN, - .tseg1_max = KVASER_USB_TSEG1_MAX, - .tseg2_min = KVASER_USB_TSEG2_MIN, - .tseg2_max = KVASER_USB_TSEG2_MAX, - .sjw_max = KVASER_USB_SJW_MAX, - .brp_min = KVASER_USB_BRP_MIN, - .brp_max = KVASER_USB_BRP_MAX, - .brp_inc = KVASER_USB_BRP_INC, -}; - static int kvaser_usb_leaf_set_bittiming(struct net_device *netdev) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); @@ -1348,11 +1401,3 @@ const struct kvaser_usb_dev_ops kvaser_usb_leaf_dev_ops = { .dev_read_bulk_callback = kvaser_usb_leaf_read_bulk_callback, .dev_frame_to_cmd = kvaser_usb_leaf_frame_to_cmd, }; - -static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg = { - .clock = { - .freq = CAN_USB_CLOCK, - }, - .timestamp_freq = 1, - .bittiming_const = &kvaser_usb_leaf_bittiming_const, -}; From 0416e7af2369b0d12a28dea8d30b104df9a6953d Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Thu, 9 Dec 2021 09:15:52 +0500 Subject: [PATCH 239/253] net: dsa: mv88e6xxx: error handling for serdes_power functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added default case to handle undefined cmode scenario in mv88e6393x_serdes_power() and mv88e6393x_serdes_power() methods. Addresses-Coverity: 1494644 ("Uninitialized scalar variable") Fixes: 21635d9203e1c (net: dsa: mv88e6xxx: Fix application of erratum 4.8 for 88E6393X) Reviewed-by: Marek Behún Signed-off-by: Ameer Hamza Link: https://lore.kernel.org/r/20211209041552.9810-1-amhamza.mgc@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/serdes.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index 55273013bfb5..2b05ead515cd 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -830,7 +830,7 @@ int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, bool up) { u8 cmode = chip->ports[port].cmode; - int err = 0; + int err; switch (cmode) { case MV88E6XXX_PORT_STS_CMODE_SGMII: @@ -842,6 +842,9 @@ int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, case MV88E6XXX_PORT_STS_CMODE_RXAUI: err = mv88e6390_serdes_power_10g(chip, lane, up); break; + default: + err = -EINVAL; + break; } if (!err && up) @@ -1541,6 +1544,9 @@ int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, case MV88E6393X_PORT_STS_CMODE_10GBASER: err = mv88e6390_serdes_power_10g(chip, lane, on); break; + default: + err = -EINVAL; + break; } if (err) From 158390e45612ef0fde160af0826f1740c36daf21 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Wed, 8 Dec 2021 18:03:33 +0800 Subject: [PATCH 240/253] udp: using datalen to cap max gso segments The max number of UDP gso segments is intended to cap to UDP_MAX_SEGMENTS, this is checked in udp_send_skb(): if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } skb->len contains network and transport header len here, we should use only data len instead. Fixes: bec1f6f69736 ("udp: generate gso with UDP_SEGMENT") Signed-off-by: Jianguo Wu Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/900742e5-81fb-30dc-6e0b-375c6cdd7982@163.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8bcecdd6aeda..23b05e28490b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -916,7 +916,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, kfree_skb(skb); return -EINVAL; } - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } From fd79a0cbf0b2e34bcc45b13acf962e2032a82203 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Wed, 8 Dec 2021 10:27:42 -0800 Subject: [PATCH 241/253] nfc: fix segfault in nfc_genl_dump_devices_done When kmalloc in nfc_genl_dump_devices() fails then nfc_genl_dump_devices_done() segfaults as below KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] CPU: 0 PID: 25 Comm: kworker/0:1 Not tainted 5.16.0-rc4-01180-g2a987e65025e-dirty #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-6.fc35 04/01/2014 Workqueue: events netlink_sock_destruct_work RIP: 0010:klist_iter_exit+0x26/0x80 Call Trace: class_dev_iter_exit+0x15/0x20 nfc_genl_dump_devices_done+0x3b/0x50 genl_lock_done+0x84/0xd0 netlink_sock_destruct+0x8f/0x270 __sk_destruct+0x64/0x3b0 sk_destruct+0xa8/0xd0 __sk_free+0x2e8/0x3d0 sk_free+0x51/0x90 netlink_sock_destruct_work+0x1c/0x20 process_one_work+0x411/0x710 worker_thread+0x6fd/0xa80 Link: https://syzkaller.appspot.com/bug?id=fc0fa5a53db9edd261d56e74325419faf18bd0df Reported-by: syzbot+f9f76f4a0766420b4a02@syzkaller.appspotmail.com Signed-off-by: Tadeusz Struk Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20211208182742.340542-1-tadeusz.struk@linaro.org Signed-off-by: Jakub Kicinski --- net/nfc/netlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 334f63c9529e..0b4fae183a4b 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -636,8 +636,10 @@ static int nfc_genl_dump_devices_done(struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; - nfc_device_iter_exit(iter); - kfree(iter); + if (iter) { + nfc_device_iter_exit(iter); + kfree(iter); + } return 0; } From 4cd8371a234d051f9c9557fcbb1f8c523b1c0d10 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 9 Dec 2021 09:13:07 +0100 Subject: [PATCH 242/253] nfc: fix potential NULL pointer deref in nfc_genl_dump_ses_done The done() netlink callback nfc_genl_dump_ses_done() should check if received argument is non-NULL, because its allocation could fail earlier in dumpit() (nfc_genl_dump_ses()). Fixes: ac22ac466a65 ("NFC: Add a GET_SE netlink API") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20211209081307.57337-1-krzysztof.kozlowski@canonical.com Signed-off-by: Jakub Kicinski --- net/nfc/netlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 0b4fae183a4b..f184b0db79d4 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1394,8 +1394,10 @@ static int nfc_genl_dump_ses_done(struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; - nfc_device_iter_exit(iter); - kfree(iter); + if (iter) { + nfc_device_iter_exit(iter); + kfree(iter); + } return 0; } From c56c96303e9289cc34716b1179597b6f470833de Mon Sep 17 00:00:00 2001 From: Jianglei Nie Date: Thu, 9 Dec 2021 14:15:11 +0800 Subject: [PATCH 243/253] nfp: Fix memory leak in nfp_cpp_area_cache_add() In line 800 (#1), nfp_cpp_area_alloc() allocates and initializes a CPP area structure. But in line 807 (#2), when the cache is allocated failed, this CPP area structure is not freed, which will result in memory leak. We can fix it by freeing the CPP area when the cache is allocated failed (#2). 792 int nfp_cpp_area_cache_add(struct nfp_cpp *cpp, size_t size) 793 { 794 struct nfp_cpp_area_cache *cache; 795 struct nfp_cpp_area *area; 800 area = nfp_cpp_area_alloc(cpp, NFP_CPP_ID(7, NFP_CPP_ACTION_RW, 0), 801 0, size); // #1: allocates and initializes 802 if (!area) 803 return -ENOMEM; 805 cache = kzalloc(sizeof(*cache), GFP_KERNEL); 806 if (!cache) 807 return -ENOMEM; // #2: missing free 817 return 0; 818 } Fixes: 4cb584e0ee7d ("nfp: add CPP access core") Signed-off-by: Jianglei Nie Acked-by: Simon Horman Link: https://lore.kernel.org/r/20211209061511.122535-1-niejianglei2021@163.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c index d7ac0307797f..34c0d2ddf9ef 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c @@ -803,8 +803,10 @@ int nfp_cpp_area_cache_add(struct nfp_cpp *cpp, size_t size) return -ENOMEM; cache = kzalloc(sizeof(*cache), GFP_KERNEL); - if (!cache) + if (!cache) { + nfp_cpp_area_free(area); return -ENOMEM; + } cache->id = 0; cache->addr = 0; From ae68d93354e5bf5191ee673982251864ea24dd5c Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Wed, 8 Dec 2021 20:54:09 +0100 Subject: [PATCH 244/253] seg6: fix the iif in the IPv6 socket control block When an IPv4 packet is received, the ip_rcv_core(...) sets the receiving interface index into the IPv4 socket control block (v5.16-rc4, net/ipv4/ip_input.c line 510): IPCB(skb)->iif = skb->skb_iif; If that IPv4 packet is meant to be encapsulated in an outer IPv6+SRH header, the seg6_do_srh_encap(...) performs the required encapsulation. In this case, the seg6_do_srh_encap function clears the IPv6 socket control block (v5.16-rc4 net/ipv6/seg6_iptunnel.c line 163): memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); The memset(...) was introduced in commit ef489749aae5 ("ipv6: sr: clear IP6CB(skb) on SRH ip4ip6 encapsulation") a long time ago (2019-01-29). Since the IPv6 socket control block and the IPv4 socket control block share the same memory area (skb->cb), the receiving interface index info is lost (IP6CB(skb)->iif is set to zero). As a side effect, that condition triggers a NULL pointer dereference if commit 0857d6f8c759 ("ipv6: When forwarding count rx stats on the orig netdev") is applied. To fix that issue, we set the IP6CB(skb)->iif with the index of the receiving interface once again. Fixes: ef489749aae5 ("ipv6: sr: clear IP6CB(skb) on SRH ip4ip6 encapsulation") Signed-off-by: Andrea Mayer Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20211208195409.12169-1-andrea.mayer@uniroma2.it Signed-off-by: Jakub Kicinski --- net/ipv6/seg6_iptunnel.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 3adc5d9211ad..d64855010948 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -161,6 +161,14 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + + /* the control block has been erased, so we have to set the + * iif once again. + * We read the receiving interface index directly from the + * skb->skb_iif as it is done in the IPv4 receiving path (i.e.: + * ip_rcv_core(...)). + */ + IP6CB(skb)->iif = skb->skb_iif; } hdr->nexthdr = NEXTHDR_ROUTING; From 9acfc57fa2b8944ed079cedbf846823ea32b8a31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Wed, 8 Dec 2021 23:37:23 +0100 Subject: [PATCH 245/253] net: mana: Fix memory leak in mana_hwc_create_wq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If allocating the DMA buffer fails, mana_hwc_destroy_wq was called without previously storing the pointer to the queue. In order to avoid leaking the pointer to the queue, store it as soon as it is allocated. Addresses-Coverity-ID: 1484720 ("Resource leak") Signed-off-by: José Expósito Reviewed-by: Dexuan Cui Link: https://lore.kernel.org/r/20211208223723.18520-1-jose.exposito89@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/hw_channel.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index 34b971ff8ef8..078d6a5a0768 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -480,16 +480,16 @@ static int mana_hwc_create_wq(struct hw_channel_context *hwc, if (err) goto out; - err = mana_hwc_alloc_dma_buf(hwc, q_depth, max_msg_size, - &hwc_wq->msg_buf); - if (err) - goto out; - hwc_wq->hwc = hwc; hwc_wq->gdma_wq = queue; hwc_wq->queue_depth = q_depth; hwc_wq->hwc_cq = hwc_cq; + err = mana_hwc_alloc_dma_buf(hwc, q_depth, max_msg_size, + &hwc_wq->msg_buf); + if (err) + goto out; + *hwc_wq_ptr = hwc_wq; return 0; out: From 61c2402665f1e10c5742033fce18392e369931d7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Dec 2021 00:49:37 -0800 Subject: [PATCH 246/253] net/sched: fq_pie: prevent dismantle issue For some reason, fq_pie_destroy() did not copy working code from pie_destroy() and other qdiscs, thus causing elusive bug. Before calling del_timer_sync(&q->adapt_timer), we need to ensure timer will not rearm itself. rcu: INFO: rcu_preempt self-detected stall on CPU rcu: 0-....: (4416 ticks this GP) idle=60d/1/0x4000000000000000 softirq=10433/10434 fqs=2579 (t=10501 jiffies g=13085 q=3989) NMI backtrace for cpu 0 CPU: 0 PID: 13 Comm: ksoftirqd/0 Not tainted 5.16.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 nmi_cpu_backtrace.cold+0x47/0x144 lib/nmi_backtrace.c:111 nmi_trigger_cpumask_backtrace+0x1b3/0x230 lib/nmi_backtrace.c:62 trigger_single_cpu_backtrace include/linux/nmi.h:164 [inline] rcu_dump_cpu_stacks+0x25e/0x3f0 kernel/rcu/tree_stall.h:343 print_cpu_stall kernel/rcu/tree_stall.h:627 [inline] check_cpu_stall kernel/rcu/tree_stall.h:711 [inline] rcu_pending kernel/rcu/tree.c:3878 [inline] rcu_sched_clock_irq.cold+0x9d/0x746 kernel/rcu/tree.c:2597 update_process_times+0x16d/0x200 kernel/time/timer.c:1785 tick_sched_handle+0x9b/0x180 kernel/time/tick-sched.c:226 tick_sched_timer+0x1b0/0x2d0 kernel/time/tick-sched.c:1428 __run_hrtimer kernel/time/hrtimer.c:1685 [inline] __hrtimer_run_queues+0x1c0/0xe50 kernel/time/hrtimer.c:1749 hrtimer_interrupt+0x31c/0x790 kernel/time/hrtimer.c:1811 local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1086 [inline] __sysvec_apic_timer_interrupt+0x146/0x530 arch/x86/kernel/apic/apic.c:1103 sysvec_apic_timer_interrupt+0x8e/0xc0 arch/x86/kernel/apic/apic.c:1097 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:638 RIP: 0010:write_comp_data kernel/kcov.c:221 [inline] RIP: 0010:__sanitizer_cov_trace_const_cmp1+0x1d/0x80 kernel/kcov.c:273 Code: 54 c8 20 48 89 10 c3 66 0f 1f 44 00 00 53 41 89 fb 41 89 f1 bf 03 00 00 00 65 48 8b 0c 25 40 70 02 00 48 89 ce 4c 8b 54 24 08 4e f7 ff ff 84 c0 74 51 48 8b 81 88 15 00 00 44 8b 81 84 15 00 RSP: 0018:ffffc90000d27b28 EFLAGS: 00000246 RAX: 0000000000000000 RBX: ffff888064bf1bf0 RCX: ffff888011928000 RDX: ffff888011928000 RSI: ffff888011928000 RDI: 0000000000000003 RBP: ffff888064bf1c28 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff875d8295 R11: 0000000000000000 R12: 0000000000000000 R13: ffff8880783dd300 R14: 0000000000000000 R15: 0000000000000000 pie_calculate_probability+0x405/0x7c0 net/sched/sch_pie.c:418 fq_pie_timer+0x170/0x2a0 net/sched/sch_fq_pie.c:383 call_timer_fn+0x1a5/0x6b0 kernel/time/timer.c:1421 expire_timers kernel/time/timer.c:1466 [inline] __run_timers.part.0+0x675/0xa20 kernel/time/timer.c:1734 __run_timers kernel/time/timer.c:1715 [inline] run_timer_softirq+0xb3/0x1d0 kernel/time/timer.c:1747 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 run_ksoftirqd kernel/softirq.c:921 [inline] run_ksoftirqd+0x2d/0x60 kernel/softirq.c:913 smpboot_thread_fn+0x645/0x9c0 kernel/smpboot.c:164 kthread+0x405/0x4f0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Mohit P. Tahiliani Cc: Sachin D. Patil Cc: V. Saicharan Cc: Mohit Bhasi Cc: Leslie Monis Cc: Gautam Ramakrishnan Link: https://lore.kernel.org/r/20211209084937.3500020-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_fq_pie.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 830f3559f727..d6aba6edd16e 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -531,6 +531,7 @@ static void fq_pie_destroy(struct Qdisc *sch) struct fq_pie_sched_data *q = qdisc_priv(sch); tcf_block_put(q->block); + q->p_params.tupdate = 0; del_timer_sync(&q->adapt_timer); kvfree(q->flows); } From 37ad4e2a77180841dffb1df64cdbd95541512d3d Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 9 Dec 2021 16:35:46 +0100 Subject: [PATCH 247/253] MAINTAINERS: s390/net: remove myself as maintainer I won't have access to the relevant HW and docs much longer. Signed-off-by: Julian Wiedmann Link: https://lore.kernel.org/r/20211209153546.1152921-1-jwi@linux.ibm.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7e51081b6708..6dd20c31d875 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16623,7 +16623,6 @@ W: http://www.ibm.com/developerworks/linux/linux390/ F: drivers/iommu/s390-iommu.c S390 IUCV NETWORK LAYER -M: Julian Wiedmann M: Alexandra Winter M: Wenjia Zhang L: linux-s390@vger.kernel.org @@ -16635,7 +16634,6 @@ F: include/net/iucv/ F: net/iucv/ S390 NETWORK DRIVERS -M: Julian Wiedmann M: Alexandra Winter M: Wenjia Zhang L: linux-s390@vger.kernel.org From e8b1d7698038e76363859fb47ae0a262080646f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Thu, 9 Dec 2021 12:05:40 +0100 Subject: [PATCH 248/253] net: dsa: felix: Fix memory leak in felix_setup_mmio_filtering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid a memory leak if there is not a CPU port defined. Fixes: 8d5f7954b7c8 ("net: dsa: felix: break at first CPU port during init and teardown") Addresses-Coverity-ID: 1492897 ("Resource leak") Addresses-Coverity-ID: 1492899 ("Resource leak") Signed-off-by: José Expósito Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20211209110538.11585-1-jose.exposito89@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 327cc4654806..f1a05e7dc818 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -290,8 +290,11 @@ static int felix_setup_mmio_filtering(struct felix *felix) } } - if (cpu < 0) + if (cpu < 0) { + kfree(tagging_rule); + kfree(redirect_rule); return -EINVAL; + } tagging_rule->key_type = OCELOT_VCAP_KEY_ETYPE; *(__be16 *)tagging_rule->key.etype.etype.value = htons(ETH_P_1588); From 373f121a3c3a741f90b2a81f120f37c539fa0c86 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Thu, 9 Dec 2021 15:46:27 +0530 Subject: [PATCH 249/253] net: wwan: iosm: fixes unnecessary doorbell send In TX packet accumulation flow transport layer is giving a doorbell to device even though there is no pending control TX transfer that needs immediate attention. Introduced a new hpda_ctrl_pending variable to keep track of pending control TX transfer. If there is a pending control TX transfer which needs an immediate attention only then give a doorbell to device. Signed-off-by: M Chetan Kumar Reviewed-by: Sergey Ryazanov Signed-off-by: Jakub Kicinski --- drivers/net/wwan/iosm/iosm_ipc_imem.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.c b/drivers/net/wwan/iosm/iosm_ipc_imem.c index cff3b43ca4d7..b4d47b31ba91 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem.c @@ -181,9 +181,9 @@ void ipc_imem_hrtimer_stop(struct hrtimer *hr_timer) bool ipc_imem_ul_write_td(struct iosm_imem *ipc_imem) { struct ipc_mem_channel *channel; + bool hpda_ctrl_pending = false; struct sk_buff_head *ul_list; bool hpda_pending = false; - bool forced_hpdu = false; struct ipc_pipe *pipe; int i; @@ -200,15 +200,19 @@ bool ipc_imem_ul_write_td(struct iosm_imem *ipc_imem) ul_list = &channel->ul_list; /* Fill the transfer descriptor with the uplink buffer info. */ - hpda_pending |= ipc_protocol_ul_td_send(ipc_imem->ipc_protocol, + if (!ipc_imem_check_wwan_ips(channel)) { + hpda_ctrl_pending |= + ipc_protocol_ul_td_send(ipc_imem->ipc_protocol, pipe, ul_list); - - /* forced HP update needed for non data channels */ - if (hpda_pending && !ipc_imem_check_wwan_ips(channel)) - forced_hpdu = true; + } else { + hpda_pending |= + ipc_protocol_ul_td_send(ipc_imem->ipc_protocol, + pipe, ul_list); + } } - if (forced_hpdu) { + /* forced HP update needed for non data channels */ + if (hpda_ctrl_pending) { hpda_pending = false; ipc_protocol_doorbell_trigger(ipc_imem->ipc_protocol, IPC_HP_UL_WRITE_TD); From 07d3f2743decaeefcf076457719ae01c8b43b6d2 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Thu, 9 Dec 2021 15:46:28 +0530 Subject: [PATCH 250/253] net: wwan: iosm: fixes net interface nonfunctional after fw flash Devlink initialization flow was overwriting the IP traffic channel configuration. This was causing wwan0 network interface to be unusable after fw flash. When device boots to fully functional mode restore the IP channel configuration. Signed-off-by: M Chetan Kumar Reviewed-by: Sergey Ryazanov Signed-off-by: Jakub Kicinski --- drivers/net/wwan/iosm/iosm_ipc_imem.c | 7 ++++++- drivers/net/wwan/iosm/iosm_ipc_imem.h | 1 + drivers/net/wwan/iosm/iosm_ipc_imem_ops.c | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.c b/drivers/net/wwan/iosm/iosm_ipc_imem.c index b4d47b31ba91..e2c096863488 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem.c @@ -531,6 +531,9 @@ static void ipc_imem_run_state_worker(struct work_struct *instance) return; } + if (test_and_clear_bit(IOSM_DEVLINK_INIT, &ipc_imem->flag)) + ipc_devlink_deinit(ipc_imem->ipc_devlink); + if (!ipc_imem_setup_cp_mux_cap_init(ipc_imem, &mux_cfg)) ipc_imem->mux = ipc_mux_init(&mux_cfg, ipc_imem); @@ -1171,7 +1174,7 @@ void ipc_imem_cleanup(struct iosm_imem *ipc_imem) ipc_port_deinit(ipc_imem->ipc_port); } - if (ipc_imem->ipc_devlink) + if (test_and_clear_bit(IOSM_DEVLINK_INIT, &ipc_imem->flag)) ipc_devlink_deinit(ipc_imem->ipc_devlink); ipc_imem_device_ipc_uninit(ipc_imem); @@ -1335,6 +1338,8 @@ struct iosm_imem *ipc_imem_init(struct iosm_pcie *pcie, unsigned int device_id, if (ipc_flash_link_establish(ipc_imem)) goto devlink_channel_fail; + + set_bit(IOSM_DEVLINK_INIT, &ipc_imem->flag); } return ipc_imem; devlink_channel_fail: diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.h b/drivers/net/wwan/iosm/iosm_ipc_imem.h index 6be6708b4eec..6b479fe23a42 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem.h +++ b/drivers/net/wwan/iosm/iosm_ipc_imem.h @@ -101,6 +101,7 @@ struct ipc_chnl_cfg; #define IOSM_CHIP_INFO_SIZE_MAX 100 #define FULLY_FUNCTIONAL 0 +#define IOSM_DEVLINK_INIT 1 /* List of the supported UL/DL pipes. */ enum ipc_mem_pipes { diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c index 825e8e5ffb2a..09261fbb79c1 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c @@ -450,6 +450,7 @@ void ipc_imem_sys_devlink_close(struct iosm_devlink *ipc_devlink) /* Release the pipe resources */ ipc_imem_pipe_cleanup(ipc_imem, &channel->ul_pipe); ipc_imem_pipe_cleanup(ipc_imem, &channel->dl_pipe); + ipc_imem->nr_of_channels--; } void ipc_imem_sys_devlink_notify_rx(struct iosm_devlink *ipc_devlink, From 383451ceb07831d37dafdf011c09366d1c034df5 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Thu, 9 Dec 2021 15:46:29 +0530 Subject: [PATCH 251/253] net: wwan: iosm: fixes unable to send AT command during mbim tx ev_cdev_write_pending flag is preventing a TX message post for AT port while MBIM transfer is ongoing. Removed the unnecessary check around control port TX transfer. Signed-off-by: M Chetan Kumar Reviewed-by: Sergey Ryazanov Signed-off-by: Jakub Kicinski --- drivers/net/wwan/iosm/iosm_ipc_imem.c | 1 - drivers/net/wwan/iosm/iosm_ipc_imem.h | 3 --- drivers/net/wwan/iosm/iosm_ipc_imem_ops.c | 6 ------ 3 files changed, 10 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.c b/drivers/net/wwan/iosm/iosm_ipc_imem.c index e2c096863488..12c03dacb5dd 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem.c @@ -1270,7 +1270,6 @@ struct iosm_imem *ipc_imem_init(struct iosm_pcie *pcie, unsigned int device_id, ipc_imem->pci_device_id = device_id; - ipc_imem->ev_cdev_write_pending = false; ipc_imem->cp_version = 0; ipc_imem->device_sleep = IPC_HOST_SLEEP_ENTER_SLEEP; diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.h b/drivers/net/wwan/iosm/iosm_ipc_imem.h index 6b479fe23a42..6b8a837faef2 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem.h +++ b/drivers/net/wwan/iosm/iosm_ipc_imem.h @@ -336,8 +336,6 @@ enum ipc_phase { * process the irq actions. * @flag: Flag to monitor the state of driver * @td_update_timer_suspended: if true then td update timer suspend - * @ev_cdev_write_pending: 0 means inform the IPC tasklet to pass - * the accumulated uplink buffers to CP. * @ev_mux_net_transmit_pending:0 means inform the IPC tasklet to pass * @reset_det_n: Reset detect flag * @pcie_wake_n: Pcie wake flag @@ -375,7 +373,6 @@ struct iosm_imem { u8 ev_irq_pending[IPC_IRQ_VECTORS]; unsigned long flag; u8 td_update_timer_suspended:1, - ev_cdev_write_pending:1, ev_mux_net_transmit_pending:1, reset_det_n:1, pcie_wake_n:1; diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c index 09261fbb79c1..831cdae28e8a 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c @@ -41,7 +41,6 @@ void ipc_imem_sys_wwan_close(struct iosm_imem *ipc_imem, int if_id, static int ipc_imem_tq_cdev_write(struct iosm_imem *ipc_imem, int arg, void *msg, size_t size) { - ipc_imem->ev_cdev_write_pending = false; ipc_imem_ul_send(ipc_imem); return 0; @@ -50,11 +49,6 @@ static int ipc_imem_tq_cdev_write(struct iosm_imem *ipc_imem, int arg, /* Through tasklet to do sio write. */ static int ipc_imem_call_cdev_write(struct iosm_imem *ipc_imem) { - if (ipc_imem->ev_cdev_write_pending) - return -1; - - ipc_imem->ev_cdev_write_pending = true; - return ipc_task_queue_send_task(ipc_imem, ipc_imem_tq_cdev_write, 0, NULL, 0, false); } From 04ec4e6250e5f58b525b08f3dca45c7d7427620e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 9 Dec 2021 09:26:47 +0000 Subject: [PATCH 252/253] net: dsa: mv88e6xxx: allow use of PHYs on CPU and DSA ports Martyn Welch reports that his CPU port is unable to link where it has been necessary to use one of the switch ports with an internal PHY for the CPU port. The reason behind this is the port control register is left forcing the link down, preventing traffic flow. This occurs because during initialisation, phylink expects the link to be down, and DSA forces the link down by synthesising a call to the DSA drivers phylink_mac_link_down() method, but we don't touch the forced-link state when we later reconfigure the port. Resolve this by also unforcing the link state when we are operating in PHY mode and the PPU is set to poll the PHY to retrieve link status information. Reported-by: Martyn Welch Tested-by: Martyn Welch Fixes: 3be98b2d5fbc ("net: dsa: Down cpu/dsa ports phylink will control") Cc: # 5.7: 2b29cb9e3f7f: net: dsa: mv88e6xxx: fix "don't use PHY_DETECT on internal PHY's" Signed-off-by: Russell King (Oracle) Link: https://lore.kernel.org/r/E1mvFhP-00F8Zb-Ul@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 62 +++++++++++++++++--------------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 9f675464efc3..14f87f6ac479 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -698,44 +698,48 @@ static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port, { struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_port *p; - int err; + int err = 0; p = &chip->ports[port]; - /* FIXME: is this the correct test? If we're in fixed mode on an - * internal port, why should we process this any different from - * PHY mode? On the other hand, the port may be automedia between - * an internal PHY and the serdes... - */ - if ((mode == MLO_AN_PHY) && mv88e6xxx_phy_is_internal(ds, port)) - return; - mv88e6xxx_reg_lock(chip); - /* In inband mode, the link may come up at any time while the link - * is not forced down. Force the link down while we reconfigure the - * interface mode. - */ - if (mode == MLO_AN_INBAND && p->interface != state->interface && - chip->info->ops->port_set_link) - chip->info->ops->port_set_link(chip, port, LINK_FORCED_DOWN); - err = mv88e6xxx_port_config_interface(chip, port, state->interface); - if (err && err != -EOPNOTSUPP) - goto err_unlock; + if (mode != MLO_AN_PHY || !mv88e6xxx_phy_is_internal(ds, port)) { + /* In inband mode, the link may come up at any time while the + * link is not forced down. Force the link down while we + * reconfigure the interface mode. + */ + if (mode == MLO_AN_INBAND && + p->interface != state->interface && + chip->info->ops->port_set_link) + chip->info->ops->port_set_link(chip, port, + LINK_FORCED_DOWN); - err = mv88e6xxx_serdes_pcs_config(chip, port, mode, state->interface, - state->advertising); - /* FIXME: we should restart negotiation if something changed - which - * is something we get if we convert to using phylinks PCS operations. - */ - if (err > 0) - err = 0; + err = mv88e6xxx_port_config_interface(chip, port, + state->interface); + if (err && err != -EOPNOTSUPP) + goto err_unlock; + + err = mv88e6xxx_serdes_pcs_config(chip, port, mode, + state->interface, + state->advertising); + /* FIXME: we should restart negotiation if something changed - + * which is something we get if we convert to using phylinks + * PCS operations. + */ + if (err > 0) + err = 0; + } /* Undo the forced down state above after completing configuration - * irrespective of its state on entry, which allows the link to come up. + * irrespective of its state on entry, which allows the link to come + * up in the in-band case where there is no separate SERDES. Also + * ensure that the link can come up if the PPU is in use and we are + * in PHY mode (we treat the PPU as an effective in-band mechanism.) */ - if (mode == MLO_AN_INBAND && p->interface != state->interface && - chip->info->ops->port_set_link) + if (chip->info->ops->port_set_link && + ((mode == MLO_AN_INBAND && p->interface != state->interface) || + (mode == MLO_AN_PHY && mv88e6xxx_port_ppu_updates(chip, port)))) chip->info->ops->port_set_link(chip, port, LINK_UNFORCED); p->interface = state->interface; From 3a49cc22d31eccceb856f468be0646faa2d4643f Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 9 Dec 2021 11:51:13 -0500 Subject: [PATCH 253/253] tools/lib/lockdep: drop leftover liblockdep headers Clean up remaining headers that are specific to liblockdep but lived in the shared header directory. These are all unused after the liblockdep code was removed in commit 7246f4dcaccc ("tools/lib/lockdep: drop liblockdep"). Note that there are still headers that were originally created for liblockdep, that still have liblockdep references, but they are used by other tools/ code at this point. Signed-off-by: Sasha Levin Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Linus Torvalds --- tools/include/linux/debug_locks.h | 14 ------ tools/include/linux/hardirq.h | 12 ------ tools/include/linux/irqflags.h | 39 ----------------- tools/include/linux/lockdep.h | 72 ------------------------------- tools/include/linux/proc_fs.h | 4 -- tools/include/linux/spinlock.h | 2 - tools/include/linux/stacktrace.h | 33 -------------- 7 files changed, 176 deletions(-) delete mode 100644 tools/include/linux/debug_locks.h delete mode 100644 tools/include/linux/hardirq.h delete mode 100644 tools/include/linux/irqflags.h delete mode 100644 tools/include/linux/lockdep.h delete mode 100644 tools/include/linux/proc_fs.h delete mode 100644 tools/include/linux/stacktrace.h diff --git a/tools/include/linux/debug_locks.h b/tools/include/linux/debug_locks.h deleted file mode 100644 index 72d595ce764a..000000000000 --- a/tools/include/linux/debug_locks.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_DEBUG_LOCKS_H_ -#define _LIBLOCKDEP_DEBUG_LOCKS_H_ - -#include -#include -#include - -#define DEBUG_LOCKS_WARN_ON(x) WARN_ON(x) - -extern bool debug_locks; -extern bool debug_locks_silent; - -#endif diff --git a/tools/include/linux/hardirq.h b/tools/include/linux/hardirq.h deleted file mode 100644 index b25580b6a9be..000000000000 --- a/tools/include/linux/hardirq.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LINUX_HARDIRQ_H_ -#define _LIBLOCKDEP_LINUX_HARDIRQ_H_ - -#define SOFTIRQ_BITS 0UL -#define HARDIRQ_BITS 0UL -#define SOFTIRQ_SHIFT 0UL -#define HARDIRQ_SHIFT 0UL -#define hardirq_count() 0UL -#define softirq_count() 0UL - -#endif diff --git a/tools/include/linux/irqflags.h b/tools/include/linux/irqflags.h deleted file mode 100644 index 501262aee8ff..000000000000 --- a/tools/include/linux/irqflags.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ -#define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ - -# define lockdep_hardirq_context() 0 -# define lockdep_softirq_context(p) 0 -# define lockdep_hardirqs_enabled() 0 -# define lockdep_softirqs_enabled(p) 0 -# define lockdep_hardirq_enter() do { } while (0) -# define lockdep_hardirq_exit() do { } while (0) -# define lockdep_softirq_enter() do { } while (0) -# define lockdep_softirq_exit() do { } while (0) -# define INIT_TRACE_IRQFLAGS - -# define stop_critical_timings() do { } while (0) -# define start_critical_timings() do { } while (0) - -#define raw_local_irq_disable() do { } while (0) -#define raw_local_irq_enable() do { } while (0) -#define raw_local_irq_save(flags) ((flags) = 0) -#define raw_local_irq_restore(flags) ((void)(flags)) -#define raw_local_save_flags(flags) ((flags) = 0) -#define raw_irqs_disabled_flags(flags) ((void)(flags)) -#define raw_irqs_disabled() 0 -#define raw_safe_halt() - -#define local_irq_enable() do { } while (0) -#define local_irq_disable() do { } while (0) -#define local_irq_save(flags) ((flags) = 0) -#define local_irq_restore(flags) ((void)(flags)) -#define local_save_flags(flags) ((flags) = 0) -#define irqs_disabled() (1) -#define irqs_disabled_flags(flags) ((void)(flags), 0) -#define safe_halt() do { } while (0) - -#define trace_lock_release(x, y) -#define trace_lock_acquire(a, b, c, d, e, f, g) - -#endif diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h deleted file mode 100644 index e56997288f2b..000000000000 --- a/tools/include/linux/lockdep.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LOCKDEP_H_ -#define _LIBLOCKDEP_LOCKDEP_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MAX_LOCK_DEPTH 63UL - -#define asmlinkage -#define __visible - -#include "../../../include/linux/lockdep.h" - -struct task_struct { - u64 curr_chain_key; - int lockdep_depth; - unsigned int lockdep_recursion; - struct held_lock held_locks[MAX_LOCK_DEPTH]; - gfp_t lockdep_reclaim_gfp; - int pid; - int state; - char comm[17]; -}; - -#define TASK_RUNNING 0 - -extern struct task_struct *__curr(void); - -#define current (__curr()) - -static inline int debug_locks_off(void) -{ - return 1; -} - -#define task_pid_nr(tsk) ((tsk)->pid) - -#define KSYM_NAME_LEN 128 -#define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__) -#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) -#define pr_warn pr_err -#define pr_cont pr_err - -#define list_del_rcu list_del - -#define atomic_t unsigned long -#define atomic_inc(x) ((*(x))++) - -#define print_tainted() "" -#define static_obj(x) 1 - -#define debug_show_all_locks() -extern void debug_check_no_locks_held(void); - -static __used bool __is_kernel_percpu_address(unsigned long addr, void *can_addr) -{ - return false; -} - -#endif diff --git a/tools/include/linux/proc_fs.h b/tools/include/linux/proc_fs.h deleted file mode 100644 index 8b3b03b64fda..000000000000 --- a/tools/include/linux/proc_fs.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef _TOOLS_INCLUDE_LINUX_PROC_FS_H -#define _TOOLS_INCLUDE_LINUX_PROC_FS_H - -#endif /* _TOOLS_INCLUDE_LINUX_PROC_FS_H */ diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h index c934572d935c..622266b197d0 100644 --- a/tools/include/linux/spinlock.h +++ b/tools/include/linux/spinlock.h @@ -37,6 +37,4 @@ static inline bool arch_spin_is_locked(arch_spinlock_t *mutex) return true; } -#include - #endif diff --git a/tools/include/linux/stacktrace.h b/tools/include/linux/stacktrace.h deleted file mode 100644 index ae343ac35bfa..000000000000 --- a/tools/include/linux/stacktrace.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LINUX_STACKTRACE_H_ -#define _LIBLOCKDEP_LINUX_STACKTRACE_H_ - -#include - -struct stack_trace { - unsigned int nr_entries, max_entries; - unsigned long *entries; - int skip; -}; - -static inline void print_stack_trace(struct stack_trace *trace, int spaces) -{ - backtrace_symbols_fd((void **)trace->entries, trace->nr_entries, 1); -} - -#define save_stack_trace(trace) \ - ((trace)->nr_entries = \ - backtrace((void **)(trace)->entries, (trace)->max_entries)) - -static inline int dump_stack(void) -{ - void *array[64]; - size_t size; - - size = backtrace(array, 64); - backtrace_symbols_fd(array, size, 1); - - return 0; -} - -#endif