Arm64 Linux 下页表 init_idmap_pg_dir的使用

二 21 五月 2024 | tags: Linux

u-boot或者uefi启动kernel的时候,默认MMU是关闭的,所以这个时候kernel实际运行的地址就是物理地址。 为了前期尽早打开MMU和cache,kernel需要建立MMU的页表,这个时候的页表是1:1映射,即VA = PA。

分配

这个页表是在Link 脚本中进行分配,参考代码:

init_idmap_pg_dir = .;
. += INIT_IDMAP_DIR_SIZE;
init_idmap_pg_end = .;

至于它的大小INIT_IDMAP_DIR_SIZE 为什么会出现在VA小于48位的时候需要+2个page,+2 表示在基础页数的基础上再增加两个页面的空间,以确保有足够的空间处理对齐、边界和其他可能的额外需求。

#if VA_BITS < 48
#define INIT_IDMAP_DIR_SIZE     ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE)
#else
#define INIT_IDMAP_DIR_SIZE     (INIT_IDMAP_DIR_PAGES * PAGE_SIZE)
#endif

创建

在函数create_idmap 会创建从_text 到 _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE 的1:1映射。 从启动到正式的页表建立之前,kernel是不会访问写任何数据,除了写device tree 和正式的页表。 所以默认把它映射成SWAPPER_RX_MMUFLAGS。然后再把FDT和init_pg_dir 的region 设置成SWAPPER_RW_MMUFLAGS度。

建立完成之后,在__enable_mmu 会去使能MMU,同时 TTBR0 = init_idmap_pg_dir TTBR1 = reserved_pg_dir

MMU的页表如下:

>mmu print EL1N_S1_TTBR0_EL1 TTBR0_EL1=init_idmap_pg_dir
Input Address   | Type           | Next Level            | Output Address        | Properties
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 0x00000000    | TTBR0_EL1      | NP:0x0000000085A90000 |                       | TBI1=1, TBI0=1, AS=1, IPS=256TB, TG1=4KB, SH1=0x3, ORGN1=0x1, IRGN1=0x1, EPD1=0, A1=1, T1SZ=16, TG0=4KB, SH0=0x3, ORGN0=0x1, IRGN0=0x1, EPD0=0, T0SZ=16, HPD1=0, HPD0=0, HD=0, HA=1, CnP=0, ASID=0
 + 0x00000000   | Level 0 Table  | NP:0x0000000085A91000 |                       | APTable=0x0, UXNTable=0, PXNTable=0
  - 0x00000000  | Invalid        |                       |                       |
  - 0x40000000  | Invalid        |                       |                       |
  + 0x80000000  | Level 1 Table  | NP:0x0000000085A92000 |                       | APTable=0x0, UXNTable=0, PXNTable=0
   - 0x80000000 | Invalid (x32)  |                       |                       |
   - 0x84000000 | Level 2 Block  |                       | NP:0x0000000084000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x84200000 | Level 2 Block  |                       | NP:0x0000000084200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x84400000 | Level 2 Block  |                       | NP:0x0000000084400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x84600000 | Level 2 Block  |                       | NP:0x0000000084600000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x84800000 | Level 2 Block  |                       | NP:0x0000000084800000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x84A00000 | Level 2 Block  |                       | NP:0x0000000084A00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x84C00000 | Level 2 Block  |                       | NP:0x0000000084C00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x84E00000 | Level 2 Block  |                       | NP:0x0000000084E00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x85000000 | Level 2 Block  |                       | NP:0x0000000085000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x85200000 | Level 2 Block  |                       | NP:0x0000000085200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x85400000 | Level 2 Block  |                       | NP:0x0000000085400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x85600000 | Level 2 Block  |                       | NP:0x0000000085600000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x85800000 | Level 2 Block  |                       | NP:0x0000000085800000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x85A00000 | Level 2 Block  |                       | NP:0x0000000085A00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x85C00000 | Level 2 Block  |                       | NP:0x0000000085C00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x85E00000 | Level 2 Block  |                       | NP:0x0000000085E00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x86000000 | Level 2 Block  |                       | NP:0x0000000086000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x86200000 | Level 2 Block  |                       | NP:0x0000000086200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
   - 0x86400000 | Level 2 Block  |                       | NP:0x0000000086400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0x86600000 | Level 2 Block  |                       | NP:0x0000000082000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0x86800000 | Level 2 Block  |                       | NP:0x0000000082200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0x86A00000 | Invalid (x459) |                       |                       |
  - 0xC0000000  | Invalid (x509) |                       |                       |
 - 0x8000000000 | Invalid (x511) |                       |                       |

设置TTBR1

这个时候TTBR1还是指向reserved_pg_dir,为了建立正式的VA到PA的映射,在函数:create_kernel_mapping 中会把整个kernel image 建立VA 到PA的映射:

SYM_FUNC_START_LOCAL(create_kernel_mapping)
        adrp    x0, init_pg_dir
        mov_q   x5, KIMAGE_VADDR                // compile time __va(_text)
#ifdef CONFIG_RELOCATABLE
        add     x5, x5, x23                     // add KASLR displacement
#endif
        adrp    x6, _end                        // runtime __pa(_end)
        adrp    x3, _text                       // runtime __pa(_text)
        sub     x6, x6, x3                      // _end - _text
        add     x6, x6, x5                      // runtime __va(_end)
        mov_q   x7, SWAPPER_RW_MMUFLAGS

        map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14

        dsb     ishst                           // sync with page table walker
        ret
SYM_FUNC_END(create_kernel_mapping)

因为这个也不是最终的页表,所以我们看到属性全都设置成了SWAPPER_RW_MMUFLAGS。 后面这个页表会被swapper_pg_dir 给替换掉。 TTBR的MMU的页表如下:

>mmu print EL1N_S1_TTBR1_EL1 TTBR1_EL1=init_pg_dir
Input Address           | Type           | Next Level            | Output Address        | Properties
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 0xFFFF000000000000    | TTBR1_EL1      | NP:0x00000000865DA000 |                       | TBI1=1, TBI0=1, AS=1, IPS=256TB, TG1=4KB, SH1=0x3, ORGN1=0x1, IRGN1=0x1, EPD1=0, A1=1, T1SZ=16, TG0=4KB, SH0=0x3, ORGN0=0x1, IRGN0=0x1, EPD0=0, T0SZ=16, HPD1=0, HPD0=0, HD=0, HA=1, CnP=0, ASID=0
 - 0xFFFF000000000000   | Invalid (x256) |                       |                       |
 + 0xFFFF800000000000   | Level 0 Table  | NP:0x00000000865DB000 |                       | APTable=0x0, UXNTable=0, PXNTable=0
  + 0xFFFF800000000000  | Level 1 Table  | NP:0x00000000865DC000 |                       | APTable=0x0, UXNTable=0, PXNTable=0
   - 0xFFFF800000000000 | Invalid (x64)  |                       |                       |
   - 0xFFFF800008000000 | Level 2 Block  |                       | NP:0x0000000084000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008200000 | Level 2 Block  |                       | NP:0x0000000084200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008400000 | Level 2 Block  |                       | NP:0x0000000084400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008600000 | Level 2 Block  |                       | NP:0x0000000084600000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008800000 | Level 2 Block  |                       | NP:0x0000000084800000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008A00000 | Level 2 Block  |                       | NP:0x0000000084A00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008C00000 | Level 2 Block  |                       | NP:0x0000000084C00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800008E00000 | Level 2 Block  |                       | NP:0x0000000084E00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009000000 | Level 2 Block  |                       | NP:0x0000000085000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009200000 | Level 2 Block  |                       | NP:0x0000000085200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009400000 | Level 2 Block  |                       | NP:0x0000000085400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009600000 | Level 2 Block  |                       | NP:0x0000000085600000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009800000 | Level 2 Block  |                       | NP:0x0000000085800000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009A00000 | Level 2 Block  |                       | NP:0x0000000085A00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009C00000 | Level 2 Block  |                       | NP:0x0000000085C00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF800009E00000 | Level 2 Block  |                       | NP:0x0000000085E00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF80000A000000 | Level 2 Block  |                       | NP:0x0000000086000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF80000A200000 | Level 2 Block  |                       | NP:0x0000000086200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF80000A400000 | Level 2 Block  |                       | NP:0x0000000086400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
   - 0xFFFF80000A600000 | Invalid (x429) |                       |                       |
  - 0xFFFF800040000000  | Invalid (x511) |                       |                       |
 - 0xFFFF808000000000   | Invalid (x255) |                       |                       |

卸载init_idmap_pg_dir

在函数__primary_switch,kernel会从1:1 的VA 切换到正式的真正的VA:

SYM_FUNC_START_LOCAL(__primary_switch)
        adrp    x1, reserved_pg_dir
        adrp    x2, init_idmap_pg_dir
        bl      __enable_mmu
#ifdef CONFIG_RELOCATABLE
        adrp    x23, KERNEL_START
        and     x23, x23, MIN_KIMG_ALIGN - 1
#ifdef CONFIG_RANDOMIZE_BASE
        mov     x0, x22
        adrp    x1, init_pg_end
        mov     sp, x1
        mov     x29, xzr
        bl      __pi_kaslr_early_init
        and     x24, x0, #SZ_2M - 1             // capture memstart offset seed
        bic     x0, x0, #SZ_2M - 1
        orr     x23, x23, x0                    // record kernel offset
#endif
#endif
        bl      clear_page_tables
        bl      create_kernel_mapping

        adrp    x1, init_pg_dir
        load_ttbr1 x1, x1, x2
#ifdef CONFIG_RELOCATABLE
        bl      __relocate_kernel
#endif
        ldr     x8, =__primary_switched
        adrp    x0, KERNEL_START                // __pa(KERNEL_START)
        br      x8
SYM_FUNC_END(__primary_switch)

这个时候1:1 的mapping就基本不用了,就在setup_arch函数调用cpu_uninstall_idmap把TTBR0设置成reserved_pg_dir

static inline void cpu_uninstall_idmap(void)
{
        struct mm_struct *mm = current->active_mm;

        cpu_set_reserved_ttbr0();
        local_flush_tlb_all();
        cpu_set_default_tcr_t0sz();

        if (mm != &init_mm && !system_uses_ttbr0_pan())
                cpu_switch_mm(mm->pgd, mm);
}

接下还会在paging_init 的里面使用 init_idmap_pg_dir,因为我们要切换MMU页表,不能直接使用TTBR1映射的函数,需要使用idmap的函数才能切换TTBR1所对应的页表。 参考函数:

static inline void __cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap, bool cnp)
{
        typedef void (ttbr_replace_func)(phys_addr_t);
        extern ttbr_replace_func idmap_cpu_replace_ttbr1;
        ttbr_replace_func *replace_phys;
        unsigned long daif;

        /* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */
        phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp));

        if (cnp)
                ttbr1 |= TTBR_CNP_BIT;

        replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);

        __cpu_install_idmap(idmap);

        /*
         * We really don't want to take *any* exceptions while TTBR1 is
         * in the process of being replaced so mask everything.
         */
        daif = local_daif_save();
        replace_phys(ttbr1);
        local_daif_restore(daif);

        cpu_uninstall_idmap();
}

回收init_idmap_pg_dir

init_idmap_pg_dir 这个页表使用完了不是立即就回收利用,这部分内存是从__init_begin 到__init_end 的一部分,所以在系统初始化结束之后,统一进行回收利用。 在函数 kernel_init->free_initmem

__init_begin = .;
//...
init_idmap_pg_dir = .;
. += INIT_IDMAP_DIR_SIZE;
init_idmap_pg_end = .;
//.....
. = ALIGN(SEGMENT_ALIGN);
__initdata_end = .;
__init_end = .;
void free_initmem(void)
{
        free_reserved_area(lm_alias(__init_begin),
                           lm_alias(__init_end),
                           POISON_FREE_INITMEM, "unused kernel");
        /*
         * Unmap the __init region but leave the VM area in place. This
         * prevents the region from being reused for kernel modules, which
         * is not supported by kallsyms.
         */
        vunmap_range((u64)__init_begin, (u64)__init_end);
}

参考:

[kernel patch]

Comments !