7. linux/init/main.c

I felt guilty writing this chapter as there are too many documents about it, if not more than enough. start_kernel() supporting functions are changed from version to version, as they depend on OS component internals, which are being improved all the time. I may not have the time for frequent document updates, so I decided to keep this chapter as simple as possible.

7.1. start_kernel()

///////////////////////////////////////////////////////////////////////////////
asmlinkage void __init start_kernel(void)
{
        char * command_line;
        extern char saved_command_line[];
/*
 * Interrupts are still disabled. Do necessary setups, then enable them
 */
        lock_kernel();
        printk(linux_banner);

        /* Memory Management in Linux, esp. for setup_arch()
         * Linux-2.4.4 MM Initialization */
        setup_arch(&command_line);
        printk("Kernel command line: %s\n", saved_command_line);

        /* linux/Documentation/kernel-parameters.txt
         * The Linux BootPrompt-HowTo */
        parse_options(command_line);

        trap_init() {
#ifdef CONFIG_EISA
                if (isa_readl(0x0FFFD9) == 'E'+('I'<<8)+('S'<<16)+('A'<<24))
                        EISA_bus = 1;
#endif
#ifdef CONFIG_X86_LOCAL_APIC
                init_apic_mappings();
#endif
                set_xxxx_gate(x, &func);    // setup gates
                cpu_init();
        }
        init_IRQ();
        sched_init();
        softirq_init() {
                for (int i=0; i<32: i++)
                        tasklet_init(bh_task_vec+i, bh_action, i);
                open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
                open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
        }
        time_init();

        /*
         * HACK ALERT! This is early. We're enabling the console before
         * we've done PCI setups etc, and console_init() must be aware of
         * this. But we do want output early, in case something goes wrong.
         */
        console_init();
#ifdef CONFIG_MODULES
        init_modules();
#endif
        if (prof_shift) {
                unsigned int size;
                /* only text is profiled */
                prof_len = (unsigned long) &_etext - (unsigned long) &_stext;
                prof_len >>= prof_shift;
                size = prof_len * sizeof(unsigned int) + PAGE_SIZE-1;
                prof_buffer = (unsigned int *) alloc_bootmem(size);
        }

        kmem_cache_init();
        sti();

        // BogoMips mini-Howto
        calibrate_delay();

        // linux/Documentation/initrd.txt
#ifdef CONFIG_BLK_DEV_INITRD
        if (initrd_start && !initrd_below_start_ok &&
                        initrd_start < min_low_pfn << PAGE_SHIFT) {
                printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
                    "disabling it.\n",initrd_start,min_low_pfn << PAGE_SHIFT);
                initrd_start = 0;
        }
#endif

        mem_init();
        kmem_cache_sizes_init();
        pgtable_cache_init();

        /*
         * For architectures that have highmem, num_mappedpages represents
         * the amount of memory the kernel can use.  For other architectures
         * it's the same as the total pages.  We need both numbers because
         * some subsystems need to initialize based on how much memory the
         * kernel can use.
         */
        if (num_mappedpages == 0)
                num_mappedpages =  num_physpages;

        fork_init(num_mempages);
        proc_caches_init();
        vfs_caches_init(num_physpages);
        buffer_init(num_physpages);
        page_cache_init(num_physpages);
#if defined(CONFIG_ARCH_S390)
        ccwcache_init();
#endif
        signals_init();
#ifdef CONFIG_PROC_FS
        proc_root_init();
#endif
#if defined(CONFIG_SYSVIPC)
        ipc_init();
#endif
        check_bugs();
        printk("POSIX conformance testing by UNIFIX\n");

        /*
         *      We count on the initial thread going ok
         *      Like idlers init is an unlocked kernel thread, which will
         *      make syscalls (and thus be locked).
         */
        smp_init() {
#ifndef CONFIG_SMP
#     ifdef CONFIG_X86_LOCAL_APIC
                APIC_init_uniprocessor();
#     else
                do { } while (0);
#     endif
#else
                /* Check Section 8.2. */
#endif
        }

        rest_init() {
                // init process, pid = 1
                kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
                unlock_kernel();
                current->need_resched = 1;
                // idle process, pid = 0
                cpu_idle();     // never return
        }
}
start_kernel() calls rest_init() to spawn an "init" process and become "idle" process itself.

7.2. init()

"Init" process:
///////////////////////////////////////////////////////////////////////////////
static int init(void * unused)
{
        lock_kernel();
        do_basic_setup();

        prepare_namespace();

        /*
         * Ok, we have completed the initial bootup, and
         * we're essentially up and running. Get rid of the
         * initmem segments and start the user-mode stuff..
         */
        free_initmem();
        unlock_kernel();

        if (open("/dev/console", O_RDWR, 0) < 0)        // stdin
                printk("Warning: unable to open an initial console.\n");

        (void) dup(0);                                  // stdout
        (void) dup(0);                                  // stderr

        /*
         * We try each of these until one succeeds.
         *
         * The Bourne shell can be used instead of init if we are
         * trying to recover a really broken machine.
         */

        if (execute_command)
                execve(execute_command,argv_init,envp_init);
        execve("/sbin/init",argv_init,envp_init);
        execve("/etc/init",argv_init,envp_init);
        execve("/bin/init",argv_init,envp_init);
        execve("/bin/sh",argv_init,envp_init);
        panic("No init found.  Try passing init= option to kernel.");
}
Refer to "man init" or SysVinit for further information on user-mode "init" process.

7.3. cpu_idle()

"Idle" process:
/*
 * The idle thread. There's no useful work to be
 * done, so just try to conserve power and have a
 * low exit latency (ie sit in a loop waiting for
 * somebody to say that they'd like to reschedule)
 */
void cpu_idle (void)
{
        /* endless idle loop with no priority at all */
        init_idle();
        current->nice = 20;
        current->counter = -100;

        while (1) {
                void (*idle)(void) = pm_idle;
                if (!idle)
                        idle = default_idle;
                while (!current->need_resched)
                        idle();
                schedule();
                check_pgt_cache();
        }
}

///////////////////////////////////////////////////////////////////////////////
void __init init_idle(void)
{
        struct schedule_data * sched_data;
        sched_data = &aligned_data[smp_processor_id()].schedule_data;

        if (current != &init_task && task_on_runqueue(current)) {
                printk("UGH! (%d:%d) was on the runqueue, removing.\n",
                        smp_processor_id(), current->pid);
                del_from_runqueue(current);
        }
        sched_data->curr = current;
        sched_data->last_schedule = get_cycles();
        clear_bit(current->processor, &wait_init_idle);
}

///////////////////////////////////////////////////////////////////////////////
void default_idle(void)
{
        if (current_cpu_data.hlt_works_ok && !hlt_counter) {
                __cli();
                if (!current->need_resched)
                        safe_halt();
                else
                        __sti();
        }
}

/* defined in linux/include/asm-i386/system.h */
#define __cli()                 __asm__ __volatile__("cli": : :"memory")
#define __sti()                 __asm__ __volatile__("sti": : :"memory")

/* used in the idle loop; sti takes one instruction cycle to complete */
#define safe_halt()             __asm__ __volatile__("sti; hlt": : :"memory")
CPU will resume code execution with the instruction following "hlt" on the return from an interrupt handler.

7.4. Reference


Space for these documents provided by Ben Spade in support of the Linux community.
Change happens - click for HOWTO index