bianbu-linux-6.6/arch/riscv/kernel/setup.c
Conor Dooley 50e63dd8ed
riscv: fix reserved memory setup
Currently, RISC-V sets up reserved memory using the "early" copy of the
device tree. As a result, when trying to get a reserved memory region
using of_reserved_mem_lookup(), the pointer to reserved memory regions
is using the early, pre-virtual-memory address which causes a kernel
panic when trying to use the buffer's name:

 Unable to handle kernel paging request at virtual address 00000000401c31ac
 Oops [#1]
 Modules linked in:
 CPU: 0 PID: 0 Comm: swapper Not tainted 6.0.0-rc1-00001-g0d9d6953d834 #1
 Hardware name: Microchip PolarFire-SoC Icicle Kit (DT)
 epc : string+0x4a/0xea
  ra : vsnprintf+0x1e4/0x336
 epc : ffffffff80335ea0 ra : ffffffff80338936 sp : ffffffff81203be0
  gp : ffffffff812e0a98 tp : ffffffff8120de40 t0 : 0000000000000000
  t1 : ffffffff81203e28 t2 : 7265736572203a46 s0 : ffffffff81203c20
  s1 : ffffffff81203e28 a0 : ffffffff81203d22 a1 : 0000000000000000
  a2 : ffffffff81203d08 a3 : 0000000081203d21 a4 : ffffffffffffffff
  a5 : 00000000401c31ac a6 : ffff0a00ffffff04 a7 : ffffffffffffffff
  s2 : ffffffff81203d08 s3 : ffffffff81203d00 s4 : 0000000000000008
  s5 : ffffffff000000ff s6 : 0000000000ffffff s7 : 00000000ffffff00
  s8 : ffffffff80d9821a s9 : ffffffff81203d22 s10: 0000000000000002
  s11: ffffffff80d9821c t3 : ffffffff812f3617 t4 : ffffffff812f3617
  t5 : ffffffff812f3618 t6 : ffffffff81203d08
 status: 0000000200000100 badaddr: 00000000401c31ac cause: 000000000000000d
 [<ffffffff80338936>] vsnprintf+0x1e4/0x336
 [<ffffffff80055ae2>] vprintk_store+0xf6/0x344
 [<ffffffff80055d86>] vprintk_emit+0x56/0x192
 [<ffffffff80055ed8>] vprintk_default+0x16/0x1e
 [<ffffffff800563d2>] vprintk+0x72/0x80
 [<ffffffff806813b2>] _printk+0x36/0x50
 [<ffffffff8068af48>] print_reserved_mem+0x1c/0x24
 [<ffffffff808057ec>] paging_init+0x528/0x5bc
 [<ffffffff808031ae>] setup_arch+0xd0/0x592
 [<ffffffff8080070e>] start_kernel+0x82/0x73c

early_init_fdt_scan_reserved_mem() takes no arguments as it operates on
initial_boot_params, which is populated by early_init_dt_verify(). On
RISC-V, early_init_dt_verify() is called twice. Once, directly, in
setup_arch() if CONFIG_BUILTIN_DTB is not enabled and once indirectly,
very early in the boot process, by parse_dtb() when it calls
early_init_dt_scan_nodes().

This first call uses dtb_early_va to set initial_boot_params, which is
not usable later in the boot process when
early_init_fdt_scan_reserved_mem() is called. On arm64 for example, the
corresponding call to early_init_dt_scan_nodes() uses fixmap addresses
and doesn't suffer the same fate.

Move early_init_fdt_scan_reserved_mem() further along the boot sequence,
after the direct call to early_init_dt_verify() in setup_arch() so that
the names use the correct virtual memory addresses. The above supposed
that CONFIG_BUILTIN_DTB was not set, but should work equally in the case
where it is - unflatted_and_copy_device_tree() also updates
initial_boot_params.

Reported-by: Valentina Fernandez <valentina.fernandezalanis@microchip.com>
Reported-by: Evgenii Shatokhin <e.shatokhin@yadro.com>
Link: https://lore.kernel.org/linux-riscv/f8e67f82-103d-156c-deb0-d6d6e2756f5e@microchip.com/
Fixes: 922b0375fc ("riscv: Fix memblock reservation for device tree blob")
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Tested-by: Evgenii Shatokhin <e.shatokhin@yadro.com>
Link: https://lore.kernel.org/r/20221107151524.3941467-1-conor.dooley@microchip.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2022-11-10 14:46:33 -08:00

331 lines
8.2 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
* Chen Liqin <liqin.chen@sunplusct.com>
* Lennox Wu <lennox.wu@sunplusct.com>
* Copyright (C) 2012 Regents of the University of California
* Copyright (C) 2020 FORTH-ICS/CARV
* Nick Kossifidis <mick@ics.forth.gr>
*/
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/memblock.h>
#include <linux/sched.h>
#include <linux/console.h>
#include <linux/screen_info.h>
#include <linux/of_fdt.h>
#include <linux/of_platform.h>
#include <linux/sched/task.h>
#include <linux/smp.h>
#include <linux/efi.h>
#include <linux/crash_dump.h>
#include <asm/alternative.h>
#include <asm/cacheflush.h>
#include <asm/cpu_ops.h>
#include <asm/early_ioremap.h>
#include <asm/pgtable.h>
#include <asm/setup.h>
#include <asm/set_memory.h>
#include <asm/sections.h>
#include <asm/sbi.h>
#include <asm/tlbflush.h>
#include <asm/thread_info.h>
#include <asm/kasan.h>
#include <asm/efi.h>
#include "head.h"
#if defined(CONFIG_DUMMY_CONSOLE) || defined(CONFIG_EFI)
struct screen_info screen_info __section(".data") = {
.orig_video_lines = 30,
.orig_video_cols = 80,
.orig_video_mode = 0,
.orig_video_ega_bx = 0,
.orig_video_isVGA = 1,
.orig_video_points = 8
};
#endif
/*
* The lucky hart to first increment this variable will boot the other cores.
* This is used before the kernel initializes the BSS so it can't be in the
* BSS.
*/
atomic_t hart_lottery __section(".sdata")
#ifdef CONFIG_XIP_KERNEL
= ATOMIC_INIT(0xC001BEEF)
#endif
;
unsigned long boot_cpu_hartid;
static DEFINE_PER_CPU(struct cpu, cpu_devices);
/*
* Place kernel memory regions on the resource tree so that
* kexec-tools can retrieve them from /proc/iomem. While there
* also add "System RAM" regions for compatibility with other
* archs, and the rest of the known regions for completeness.
*/
static struct resource kimage_res = { .name = "Kernel image", };
static struct resource code_res = { .name = "Kernel code", };
static struct resource data_res = { .name = "Kernel data", };
static struct resource rodata_res = { .name = "Kernel rodata", };
static struct resource bss_res = { .name = "Kernel bss", };
#ifdef CONFIG_CRASH_DUMP
static struct resource elfcorehdr_res = { .name = "ELF Core hdr", };
#endif
static int __init add_resource(struct resource *parent,
struct resource *res)
{
int ret = 0;
ret = insert_resource(parent, res);
if (ret < 0) {
pr_err("Failed to add a %s resource at %llx\n",
res->name, (unsigned long long) res->start);
return ret;
}
return 1;
}
static int __init add_kernel_resources(void)
{
int ret = 0;
/*
* The memory region of the kernel image is continuous and
* was reserved on setup_bootmem, register it here as a
* resource, with the various segments of the image as
* child nodes.
*/
code_res.start = __pa_symbol(_text);
code_res.end = __pa_symbol(_etext) - 1;
code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
rodata_res.start = __pa_symbol(__start_rodata);
rodata_res.end = __pa_symbol(__end_rodata) - 1;
rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
data_res.start = __pa_symbol(_data);
data_res.end = __pa_symbol(_edata) - 1;
data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
bss_res.start = __pa_symbol(__bss_start);
bss_res.end = __pa_symbol(__bss_stop) - 1;
bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
kimage_res.start = code_res.start;
kimage_res.end = bss_res.end;
kimage_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
ret = add_resource(&iomem_resource, &kimage_res);
if (ret < 0)
return ret;
ret = add_resource(&kimage_res, &code_res);
if (ret < 0)
return ret;
ret = add_resource(&kimage_res, &rodata_res);
if (ret < 0)
return ret;
ret = add_resource(&kimage_res, &data_res);
if (ret < 0)
return ret;
ret = add_resource(&kimage_res, &bss_res);
return ret;
}
static void __init init_resources(void)
{
struct memblock_region *region = NULL;
struct resource *res = NULL;
struct resource *mem_res = NULL;
size_t mem_res_sz = 0;
int num_resources = 0, res_idx = 0;
int ret = 0;
/* + 1 as memblock_alloc() might increase memblock.reserved.cnt */
num_resources = memblock.memory.cnt + memblock.reserved.cnt + 1;
res_idx = num_resources - 1;
mem_res_sz = num_resources * sizeof(*mem_res);
mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES);
if (!mem_res)
panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz);
/*
* Start by adding the reserved regions, if they overlap
* with /memory regions, insert_resource later on will take
* care of it.
*/
ret = add_kernel_resources();
if (ret < 0)
goto error;
#ifdef CONFIG_KEXEC_CORE
if (crashk_res.start != crashk_res.end) {
ret = add_resource(&iomem_resource, &crashk_res);
if (ret < 0)
goto error;
}
#endif
#ifdef CONFIG_CRASH_DUMP
if (elfcorehdr_size > 0) {
elfcorehdr_res.start = elfcorehdr_addr;
elfcorehdr_res.end = elfcorehdr_addr + elfcorehdr_size - 1;
elfcorehdr_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
add_resource(&iomem_resource, &elfcorehdr_res);
}
#endif
for_each_reserved_mem_region(region) {
res = &mem_res[res_idx--];
res->name = "Reserved";
res->flags = IORESOURCE_MEM | IORESOURCE_EXCLUSIVE;
res->start = __pfn_to_phys(memblock_region_reserved_base_pfn(region));
res->end = __pfn_to_phys(memblock_region_reserved_end_pfn(region)) - 1;
/*
* Ignore any other reserved regions within
* system memory.
*/
if (memblock_is_memory(res->start)) {
/* Re-use this pre-allocated resource */
res_idx++;
continue;
}
ret = add_resource(&iomem_resource, res);
if (ret < 0)
goto error;
}
/* Add /memory regions to the resource tree */
for_each_mem_region(region) {
res = &mem_res[res_idx--];
if (unlikely(memblock_is_nomap(region))) {
res->name = "Reserved";
res->flags = IORESOURCE_MEM | IORESOURCE_EXCLUSIVE;
} else {
res->name = "System RAM";
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
}
res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
ret = add_resource(&iomem_resource, res);
if (ret < 0)
goto error;
}
/* Clean-up any unused pre-allocated resources */
if (res_idx >= 0)
memblock_free(mem_res, (res_idx + 1) * sizeof(*mem_res));
return;
error:
/* Better an empty resource tree than an inconsistent one */
release_child_resources(&iomem_resource);
memblock_free(mem_res, mem_res_sz);
}
static void __init parse_dtb(void)
{
/* Early scan of device tree from init memory */
if (early_init_dt_scan(dtb_early_va)) {
const char *name = of_flat_dt_get_machine_name();
if (name) {
pr_info("Machine model: %s\n", name);
dump_stack_set_arch_desc("%s (DT)", name);
}
} else {
pr_err("No DTB passed to the kernel\n");
}
#ifdef CONFIG_CMDLINE_FORCE
strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
pr_info("Forcing kernel command line to: %s\n", boot_command_line);
#endif
}
void __init setup_arch(char **cmdline_p)
{
parse_dtb();
setup_initial_init_mm(_stext, _etext, _edata, _end);
*cmdline_p = boot_command_line;
early_ioremap_setup();
jump_label_init();
parse_early_param();
efi_init();
paging_init();
#if IS_ENABLED(CONFIG_BUILTIN_DTB)
unflatten_and_copy_device_tree();
#else
if (early_init_dt_verify(__va(XIP_FIXUP(dtb_early_pa))))
unflatten_device_tree();
else
pr_err("No DTB found in kernel mappings\n");
#endif
early_init_fdt_scan_reserved_mem();
misc_mem_init();
init_resources();
sbi_init();
#ifdef CONFIG_KASAN
kasan_init();
#endif
#ifdef CONFIG_SMP
setup_smp();
#endif
riscv_init_cbom_blocksize();
riscv_fill_hwcap();
apply_boot_alternatives();
}
static int __init topology_init(void)
{
int i, ret;
for_each_possible_cpu(i) {
struct cpu *cpu = &per_cpu(cpu_devices, i);
cpu->hotpluggable = cpu_has_hotplug(i);
ret = register_cpu(cpu, i);
if (unlikely(ret))
pr_warn("Warning: %s: register_cpu %d failed (%d)\n",
__func__, i, ret);
}
return 0;
}
subsys_initcall(topology_init);
void free_initmem(void)
{
if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end),
IS_ENABLED(CONFIG_64BIT) ?
set_memory_rw : set_memory_rw_nx);
free_initmem_default(POISON_FREE_INITMEM);
}