FDT的区域的Map过程

三 22 五月 2024 | tags: Linux

u-boot或者uefi启动Linux的时候,会把device tree 所在的地址通过x0寄存器传给Linux。 kernel在boot阶段会使用到x0寄存器,所以在primary_entry -> preserve_boot_args 会把device tree的地址保存到x21寄存器:

SYM_CODE_START_LOCAL(preserve_boot_args)
        mov     x21, x0                         // x21=FDT

1:1 mapping阶段

在init_idmap_pg_dir阶段,会把这个x21地址所对应的物理地址mapping到kernel结尾之后2M对的的地方,这个地方其实不是1:1的映射。 参考:primary_entry -> create_idmap:

/* Remap the FDT after the kernel image */
adrp    x1, _text
adrp    x22, _end + SWAPPER_BLOCK_SIZE
bic     x2, x22, #SWAPPER_BLOCK_SIZE - 1
bfi     x22, x21, #0, #SWAPPER_BLOCK_SHIFT              // remapped FDT address
add     x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
bic     x4, x21, #SWAPPER_BLOCK_SIZE - 1
mov_q   x5, SWAPPER_RW_MMUFLAGS
mov     x6, #SWAPPER_BLOCK_SHIFT
bl      remap_region

可以发现,x22寄存器对应的就是fdt的在使用init_idmap_pg_dir页表阶段的VA,在enable了init_idmap_pg_dir 之后,我们只能使用这个VA来访问fdt的内容。 这也就是为什么在__primary_switch,在调用__pi_kaslr_early_init 之前把x22寄存器赋给x0. 而kaslr_early_init第一个参数就是对应的是fdt的地址。

SYM_FUNC_START_LOCAL(__primary_switch)
        adrp    x1, reserved_pg_dir
        adrp    x2, init_idmap_pg_dir
        bl      __enable_mmu
#ifdef CONFIG_RELOCATABLE
        adrp    x23, KERNEL_START
        and     x23, x23, MIN_KIMG_ALIGN - 1
#ifdef CONFIG_RANDOMIZE_BASE
        mov     x0, x22
        adrp    x1, init_pg_end
        mov     sp, x1
        mov     x29, xzr
        bl      __pi_kaslr_early_init
        and     x24, x0, #SZ_2M - 1             // capture memstart offset seed
        bic     x0, x0, #SZ_2M - 1
        orr     x23, x23, x0                    // record kernel offset
#endif
因为这个时候不能直接调用C代码,因为没有设置好栈。这里为了能够调用下面的C代码,临时使用init_pg_end的这个页表区域作为临时栈,就可以调用如下C 代码:
asmlinkage u64 kaslr_early_init(void *fdt)
{
        u64 seed;

        if (is_kaslr_disabled_cmdline(fdt))
                return 0;

init_task 阶段

在这个阶段,x21还是fdt的PA,而x22是对应的VA。在函数__primary_switched 中,首先把x21对应的PA存到__fdt_pointer 这个变量中,setup_arch->setup_machine_fdt 来把这个PA映射成0xffffxxxxxxxxxxxx的地址。

SYM_FUNC_START_LOCAL(__primary_switched)
        adr_l   x4, init_task
        init_cpu_task x4, x5, x6

        adr_l   x8, vectors                     // load VBAR_EL1 with virtual
        msr     vbar_el1, x8                    // vector table address
        isb

        stp     x29, x30, [sp, #-16]!
        mov     x29, sp

        str_l   x21, __fdt_pointer, x5          // Save FDT pointer

在__primary_switched,我们已经把task 切换到init_task,这个时候对应的mm是init_mm:

struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
//...
        .mm             = NULL,
        .active_mm      = &init_mm,
struct mm_struct init_mm = {
        //...
        INIT_MM_CONTEXT(init_mm)
};
#define INIT_MM_CONTEXT(name)   \
        .pgd = init_pg_dir,

可以看到这个task的pgd被初始化成了init_pg_dir,这也就是在调用early_fdt_map 的时候,没有给MMU的页表的基地址的原因。 但是init_pg_dir这个区域的大小是根据kernel image的大小已经确定好了的,如果想要在early_fdt_map 函数里面增加fdt的mapping,那么就需要额外分配页表。 在进行fdt的mapping的时候,根据VA的不同,这里就需要用到中间或者最后一级的页表,而这个时候memblock又没有ready,所以这个时候只能是静态分配的,参考 arch/arm64/mm/fixmap.c:

static pte_t bm_pte[NR_BM_PTE_TABLES][PTRS_PER_PTE] __page_aligned_bss;
static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;

在函数early_fixmap_init 建立FDT页表的时候,都是使用的上面几个静态的页表。调用完 __primary_switched-> early_fdt_map-> early_fixmap_init();之后,可以看到FIXADDR并没有和PA链接起来,页表如下:

>mmu print EL1N_S1_TTBR1_EL1 TTBR1_EL1=0x00000000865DA000
Input Address            | Type           | Next Level            | Output Address        | Properties
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 0xFFFF000000000000     | TTBR1_EL1      | NP:0x00000000865DA000 |                       | TBI1=1, TBI0=1, AS=1, IPS=256TB, TG1=4KB, SH1=0x3, ORGN1=0x1, IRGN1=0x1, EPD1=0, A1=1, T1SZ=16, TG0=4KB, SH0=0x3, ORGN0=0x1, IRGN0=0x1, EPD0=0, T0SZ=16, HPD1=0, HPD0=0, HD=0, HA=1, CnP=0, ASID=0
 - 0xFFFF000000000000    | Invalid (x256) |                       |                       |
 + 0xFFFF800000000000    | Level 0 Table  | NP:0x00000000865DB000 |                       | APTable=0x0, UXNTable=0, PXNTable=0
  + 0xFFFF800000000000   | Level 1 Table  | NP:0x00000000865DC000 |                       | APTable=0x0, UXNTable=0, PXNTable=0
   - 0xFFFF800000000000  | Invalid (x64)  |                       |                       |
   - 0xFFFF800008000000  | Level 2 Block  |                       | NP:0x0000000084000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008200000  | Level 2 Block  |                       | NP:0x0000000084200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008400000  | Level 2 Block  |                       | NP:0x0000000084400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008600000  | Level 2 Block  |                       | NP:0x0000000084600000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008800000  | Level 2 Block  |                       | NP:0x0000000084800000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008A00000  | Level 2 Block  |                       | NP:0x0000000084A00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008C00000  | Level 2 Block  |                       | NP:0x0000000084C00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008E00000  | Level 2 Block  |                       | NP:0x0000000084E00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009000000  | Level 2 Block  |                       | NP:0x0000000085000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009200000  | Level 2 Block  |                       | NP:0x0000000085200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009400000  | Level 2 Block  |                       | NP:0x0000000085400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009600000  | Level 2 Block  |                       | NP:0x0000000085600000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009800000  | Level 2 Block  |                       | NP:0x0000000085800000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009A00000  | Level 2 Block  |                       | NP:0x0000000085A00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009C00000  | Level 2 Block  |                       | NP:0x0000000085C00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009E00000  | Level 2 Block  |                       | NP:0x0000000085E00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF80000A000000  | Level 2 Block  |                       | NP:0x0000000086000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF80000A200000  | Level 2 Block  |                       | NP:0x0000000086200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF80000A400000  | Level 2 Block  |                       | NP:0x0000000086400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF80000A600000  | Invalid (x429) |                       |                       |
  - 0xFFFF800040000000   | Invalid (x511) |                       |                       |
 - 0xFFFF808000000000    | Invalid (x246) |                       |                       |
 + 0xFFFFFB8000000000    | Level 0 Table  | NP:0x0000000086542000 |                       | APTable=0x0, UXNTable=0, PXNTable=0
  - 0xFFFFFB8000000000   | Invalid (x511) |                       |                       |
  + 0xFFFFFBFFC0000000   | Level 1 Table  | NP:0x0000000086543000 |                       | APTable=0x0, UXNTable=0, PXNTable=0
   - 0xFFFFFBFFC0000000  | Invalid (x493) |                       |                       |
   + 0xFFFFFBFFFDA00000  | Level 2 Table  | NP:0x0000000086544000 |                       | APTable=0x0, UXNTable=0, PXNTable=0
    - 0xFFFFFBFFFDA00000 | Invalid (x512) |                       |                       |
   - 0xFFFFFBFFFDC00000  | Invalid (x18)  |                       |                       |
 - 0xFFFFFC0000000000    | Invalid (x8)   |                       |                       |

0x0000000086542000 0x0000000086543000 等地址就是bm_pud, bm_pmd 的地址。 在__primary_switched-> early_fdt_map->fixmap_remap_fdt 调用完成之后,fdt的PA就和VA给对应起来了,页表如下:

>mmu print EL1N_S1_TTBR1_EL1 TTBR1_EL1=0x00000000865DA000
Input Address            | Type           | Next Level            | Output Address        | Properties
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 0xFFFF000000000000     | TTBR1_EL1      | NP:0x00000000865DA000 |                       | TBI1=1, TBI0=1, AS=1, IPS=256TB, TG1=4KB, SH1=0x3, ORGN1=0x1, IRGN1=0x1, EPD1=0, A1=1, T1SZ=16, TG0=4KB, SH0=0x3, ORGN0=0x1, IRGN0=0x1, EPD0=0, T0SZ=16, HPD1=0, HPD0=0, HD=0, HA=1, CnP=0, ASID=0
 - 0xFFFF000000000000    | Invalid (x256) |                       |                       |
 + 0xFFFF800000000000    | Level 0 Table  | NP:0x00000000865DB000 |                       | APTable=0x0, UXNTable=0, PXNTable=0
  + 0xFFFF800000000000   | Level 1 Table  | NP:0x00000000865DC000 |                       | APTable=0x0, UXNTable=0, PXNTable=0
   - 0xFFFF800000000000  | Invalid (x64)  |                       |                       |
   - 0xFFFF800008000000  | Level 2 Block  |                       | NP:0x0000000084000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008200000  | Level 2 Block  |                       | NP:0x0000000084200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008400000  | Level 2 Block  |                       | NP:0x0000000084400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008600000  | Level 2 Block  |                       | NP:0x0000000084600000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008800000  | Level 2 Block  |                       | NP:0x0000000084800000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008A00000  | Level 2 Block  |                       | NP:0x0000000084A00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008C00000  | Level 2 Block  |                       | NP:0x0000000084C00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008E00000  | Level 2 Block  |                       | NP:0x0000000084E00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009000000  | Level 2 Block  |                       | NP:0x0000000085000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009200000  | Level 2 Block  |                       | NP:0x0000000085200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009400000  | Level 2 Block  |                       | NP:0x0000000085400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009600000  | Level 2 Block  |                       | NP:0x0000000085600000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009800000  | Level 2 Block  |                       | NP:0x0000000085800000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009A00000  | Level 2 Block  |                       | NP:0x0000000085A00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009C00000  | Level 2 Block  |                       | NP:0x0000000085C00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009E00000  | Level 2 Block  |                       | NP:0x0000000085E00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF80000A000000  | Level 2 Block  |                       | NP:0x0000000086000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF80000A200000  | Level 2 Block  |                       | NP:0x0000000086200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF80000A400000  | Level 2 Block  |                       | NP:0x0000000086400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF80000A600000  | Invalid (x429) |                       |                       |
  - 0xFFFF800040000000   | Invalid (x511) |                       |                       |
 - 0xFFFF808000000000    | Invalid (x246) |                       |                       |
 + 0xFFFFFB8000000000    | Level 0 Table  | NP:0x0000000086542000 |                       | APTable=0x0, UXNTable=0, PXNTable=0
  - 0xFFFFFB8000000000   | Invalid (x511) |                       |                       |
  + 0xFFFFFBFFC0000000   | Level 1 Table  | NP:0x0000000086543000 |                       | APTable=0x0, UXNTable=0, PXNTable=0
   - 0xFFFFFBFFC0000000  | Invalid (x493) |                       |                       |
   + 0xFFFFFBFFFDA00000  | Level 2 Table  | NP:0x0000000086544000 |                       | APTable=0x0, UXNTable=0, PXNTable=0
    - 0xFFFFFBFFFDA00000 | Invalid (x512) |                       |                       |
   - 0xFFFFFBFFFDC00000  | Level 2 Block  |                       | NP:0x0000000082000000 | UXN=1, PXN=1, Contiguous=0, DBM=1, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFFFBFFFDE00000  | Invalid (x17)  |                       |                       |
 - 0xFFFFFC0000000000    | Invalid (x8)   |                       |                       |

0x0000000082000000 就是之前保存在x21,再用x0传给early_fdt_map的。 代码如下:

SYM_FUNC_START_LOCAL(__primary_switched)
//...
        mov     x0, x21                         // pass FDT address in x0
        bl      early_fdt_map                   // Try mapping the FDT early
        mov     x0, x20                         // pass the full boot status
        bl      init_feature_override           // Parse cpu feature overrides

这个时候映射的FDT的页表,只给函数init_feature_override使用。 这些FDT(也就是fixed map)的页表用完之后也不会释放,会被复制到最终的页表(swapper_pg_dir)里面: setup_arch->paging_init->map_kernel->fixmap_copy:

void __init fixmap_copy(pgd_t *pgdir)
{
        if (!READ_ONCE(pgd_val(*pgd_offset_pgd(pgdir, FIXADDR_TOT_START)))) {
                /*
                 * The fixmap falls in a separate pgd to the kernel, and doesn't
                 * live in the carveout for the swapper_pg_dir. We can simply
                 * re-use the existing dir for the fixmap.
                 */
                set_pgd(pgd_offset_pgd(pgdir, FIXADDR_TOT_START),
                        READ_ONCE(*pgd_offset_k(FIXADDR_TOT_START)));
        } else if (CONFIG_PGTABLE_LEVELS > 3) {
                pgd_t *bm_pgdp;
                p4d_t *bm_p4dp;
                pud_t *bm_pudp;
                /*
                 * The fixmap shares its top level pgd entry with the kernel
                 * mapping. This can really only occur when we are running
                 * with 16k/4 levels, so we can simply reuse the pud level
                 * entry instead.
                 */
                BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
                bm_pgdp = pgd_offset_pgd(pgdir, FIXADDR_TOT_START);
                bm_p4dp = p4d_offset(bm_pgdp, FIXADDR_TOT_START);
                bm_pudp = pud_set_fixmap_offset(bm_p4dp, FIXADDR_TOT_START);
                pud_populate(&init_mm, bm_pudp, lm_alias(bm_pmd));
                pud_clear_fixmap();
        } else {
                BUG();
        }
}

setup_arch阶段

函数setup_arch-> early_fixmap_init其实是可以不用调用的,因为在__primary_switched-> early_fdt_map -> early_fixmap_init 已经调用了这个函数,并且这个时候的页表并没有什么变化。所以其实是可以不用调用的。

在setup_arch 阶段,这里会最终设置FDT的页表,就是在setup_arch-> setup_machine_fdt -> fixmap_remap_fdt ,但是这个fixmap_remap_fdt 其实不是必须的,因为在上面的流程里面,已经把FDT 的VA->PA给mapping上了。 最终的页表页不会有任何变化。

static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
        int size;
        void *dt_virt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
        const char *name;

最终,FDT的VA地址赋给了变量 initial_boot_params, 在函数 setup_machine_fdt-> early_init_dt_scan-> early_init_dt_verify, 后面就用这个变量来解析FDT了。

static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
        int size;
        void *dt_virt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
        const char *name;

        if (dt_virt)
                memblock_reserve(dt_phys, size);

        if (!dt_virt || !early_init_dt_scan(dt_virt)) {
                //..
        }

        /* Early fixups are done, map the FDT as read-only now */
        fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO);

接下来就是从init_pg_dir转换到swapper_pg_dir这个页,前面讲过会把FDT的页表接上swapper_pg_dir,不需要重新映射,调用过程: setup_arch->paging_init->map_kernel->fixmap_copy。 这样FDT就完成了在正式的页表里面的映射。

Comments !