//----------------------------------------------------------------- // mphello.s // // This program employs Intel's MP Initialization Protocol // to awaken any auxilliary processors that may be present // and allows each processor to display its APIC Local-ID. // // to assemble: $ as mphello.s -o mphello.o // and to link: $ ld mphello.o -T ldscript -o mphello.b // and install: $ dd if=mphello.b of=/dev/sda4 seek=1 // // NOTE: This code begins executing with CS:IP = 1000:0002. // // programmer: ALLAN CRUSE // written on: 31 OCT 2008 //----------------------------------------------------------------- # manifest constants .equ APIC_BASE, 0xFEE00000 # base-address for APIC .equ realCS, 0x1000 # arena segment-address .section .text #------------------------------------------------------------------ .word 0xABCD # our loader expects this #------------------------------------------------------------------ main: .code16 # for real-mode execution mov %sp, %cs:ipltos+0 # preserve loader's SP mov %ss, %cs:ipltos+2 # preserve loader's SS mov %cs, %ax # address program arena mov %ax, %ss # using SS register lea tos, %sp # establish local stack call allow_4GB_addressing call display_APIC_LocalID call broadcast_AP_startup call delay_until_APs_halt lss %cs:ipltos, %sp # recover loader's SS:SP lret # return control to loader #------------------------------------------------------------------ ipltos: .word 0, 0 # holds loader's SS and SS #------------------------------------------------------------------ # We must briefly enter protected-mode to arrange for APIC access # theGDT: .quad 0x0000000000000000 # required null-descriptor .equ sel_FS, (.-theGDT)+0 # 'flat' segment-selector .quad 0x008F92000000FFFF # 4GB (readable/writable) .equ limGDT, (.-theGDT)-1 # our GDT's segment-limit #------------------------------------------------------------------ regGDT: .word limGDT, theGDT, 0x0001 # register-image for GDTR #------------------------------------------------------------------ #------------------------------------------------------------------ allow_4GB_addressing: # # Here we setup register FS, so it can access the APIC registers. # pushf # preserve current flags cli # and disable interrupts mov %cr0, %eax # get CPU's register CR0 bts $0, %eax # turn on PE-bit's image mov %eax, %cr0 # enter 'protected-mode' lgdt %cs:regGDT # setup GDT for this CPU mov $sel_FS, %ax # flat-segment's selector mov %ax, %fs # raises FS segment-limit mov %cr0, %eax # get CPU's register CR0 btr $0, %eax # reset image for PE-bit mov %eax, %cr0 # go back to 'real-mode' xor %ax, %ax # load base-address zero mov %ax, %fs # for our 'flat' segment popf # restore previous flags ret #------------------------------------------------------------------ msg: .ascii "Hello from processor " # message from processor pid: .ascii " " # buffer for CPU LocalID .ascii "CR0=" # legend for CR0 display msw: .ascii "xxxxxxxx \n\r" # buffer for CR0 content len: .short .- msg # length of message-text att: .byte 0x0B # message attribute-byte mutex: .word 1 # mutual-exclusion flag n_APs: .word 0 # count of awakened APs n_fin: .word 0 # count of finished APs newSS: .word 0x2000 # stack segment-address hex: .ascii "0123456789ABCDEF" # table of hex numerals #------------------------------------------------------------------ eax2hex: # converts value in EAX to hexadecimal string at DS:DI pushal mov $8, %cx # number of nybbles nxnyb: rol $4, %eax # next nybble into AL mov %al, %bl # copy nybble into BL and $0x0F, %bx # and convert to word mov hex(%bx), %dl # lookup ascii numeral mov %dl, (%di) # put numeral in buffer inc %di # advance buffer address loop nxnyb # again for next nybble popal ret #------------------------------------------------------------------ #------------------------------------------------------------------ display_APIC_LocalID: # # This procedure will be executed by each of the processors, but # because its message-buffer requires mutually exclusive access, # and because ROM-BIOS int-0x10 routines are not 'reentrant', we # must insure that only one CPU at a time can execute this code, # so we employ a 'spinlock' (and utilize the x86 'lock' prefix). # mov %cs, %ax # address program arena mov %ax, %ds # with DS register mov %ax, %es # also ES register # acquire the spinlock -- so only one CPU at a time can # modify the message-text and call BIOS video functions spin: bt $0, mutex # mutex is available? jnc spin # no, wait till it is lock # exclusive bus-access btr $0, mutex # try to grab the mutex jnc spin # spin until successful # put this CPU's Local-APIC ID-register in message-text mov $APIC_BASE, %ebx # mov %fs:0x20(%ebx), %eax # ID-register's value rol $8, %eax # rotate ID into AL and $0x0F, %al # isolate lowest nybble add $'0', %al # convert to a numeral mov %al, pid # and put into message # format contents of this CPU's register CR0 for display mov %cr0, %eax # load value from CR0 lea msw, %di # point DS:DI to buffer call eax2hex # convert value to hex # invoke ROM-BIOS functions to write message onto screen mov $0x0F, %ah # get_display_page int $0x10 # invoke BIOS service mov $0x03, %ah # get_cursor_location int $0x10 # invoke BIOS service lea msg, %bp # point ES:BP to string mov len, %cx # string's length in CX mov att, %bl # color-attribute in BL mov $0x1301, %ax # write_string int $0x10 # invoke BIOS service # we have now finished with this 'non-reentrant' code bts $0, mutex # release the spinlock ret #------------------------------------------------------------------ #------------------------------------------------------------------ delay_EAX_microseconds: # # This helper-function will implement the timed delays which are # specified in Intel's 'Multiprocessor Initialization Protocol', # where the delay-duration (in microseconds) is in register EAX. # pushal mov %eax, %ecx # copy microseconds count # enable the 8254 Channel-2 counter in $0x61, %al # get PORT_B settings and $0x0D, %al # turn PC speaker off or $0x01, %al # turn on Gate2 input out %al, $0x61 # output new settings # program channel-2 for one-shot countdown mov $0xB0, %al # chan2,LSB/MSB,one-shot out %al, $0x43 # output command to PIT # compute value for channel-2 latch-register mov $1193182, %eax # input-pulses-per-second mul %ecx # * number of microseconds mov $1000000, %ecx # microseconds-per-second div %ecx # division by doubleword # write latch-resister value to channel-2 out %al, $0x42 mov %ah, %al out %al, $0x42 # wait for channel-2 countdown to conclude nxpoll: in $0x61, %al test $0x20, %al jz nxpoll # disable the 8254 Channel-2 counter in $0x61, %al # get PORT_B settings and $0x0C, %al # turn off channel-2 out %al, $0x61 # output new settings popal ret #------------------------------------------------------------------ delay_until_APs_halt: # # Here the BootStrap Processor waits for all the APs to finish. # sti # allow timer interrupts check: hlt # halt until interrupted mov n_APs, %ax # number of awakened APs sub n_fin, %ax # equals number finished? jnz check # no, check again later ret #------------------------------------------------------------------ #------------------------------------------------------------------ broadcast_AP_startup: # # This procedure is executed only by the BootStrap Processor, to # awaken the Auxilliary Processors so that they each can display # their Local-APIC ID-number (and their CR0 register's value, so # we can verify that the cache-related bits are setup properly). # We use code here which follows the MP Initialization Protocol. # # point FS:EBX to the Local-APIC's memory-mapped page xor %ax, %ax # address segment zero mov %ax, %fs # with FS register mov $APIC_BASE, %ebx # APIC address in EBX # compute the page-number (where each AP should start) mov $realCS, %edx # arena segment-address shl $4, %edx # multiplied by sixteen add $tos, %edx # plus entry's offset shr $12, %edx # divided by page-size and $0xFF, %edx # must be in bottom 1MB # issue an 'INIT' Inter-Processor Interrupt command mov $0x000C4500, %eax # broadcast INIT-IPI mov %eax, %fs:0x300(%ebx) # to all-except-self # do ten-millisecond delay, enough time for APs to awaken mov $10000, %eax # ten-thousand microseconds call delay_EAX_microseconds # execute programmed delay # wait for indication of the command's completion spin1: bt $12, %fs:0x300(%ebx) # command-in-progress? jc spin1 # yes, spin until done #-------------------------------------------------------- # now we complete the Intel 'MP Initialization Protocol' #-------------------------------------------------------- mov $2, %ecx # protocol's repetitions nxIPI: # issue a 'Startup' Inter-Processor Interrupt command mov $0x000C4600, %eax # issue 'Startup-IPI' mov %dl, %al # page is the vector mov %eax, %fs:0x300(%ebx) # to all-except-self # delay for 200 microseconds mov $200, %eax # number of microseconds call delay_EAX_microseconds # for a programmed delay # wait for indication of the command's completion spin2: bt $12, %fs:0x300(%ebx) # command-in-progress? jc spin2 # yes, spin until done # repeat this 'Statup-IPI' step twice (per the protocol) loop nxIPI # again for MP protocol ret #------------------------------------------------------------------ #------------------------------------------------------------------ initAP: # # This procedure will be executed by each Application Processor as # it is awakened by the BootStrap Processor sending Startup-IPI's. # In order that each processor can call subroutines, it requires a # private stack-area, which we setup sequentially using the 'xadd' # instruction (to guarantee that stack-areas are non-overlapping). # But until its stack is ready, this CPU cannot handle interrupts. # cli # disable interrupts mov %cs, %ax # address program arena mov %ax, %ds # using DS register mov %ax, %es # and ES register # increment the count of processors that have awakened lock # insure 'atomic' update incw n_APs # increment the AP count # setup an exclusive stack-region for this processor mov $0x1000, %ax # paragraphs in segment xadd %ax, newSS # 'atomic' xchg-and-add mov %ax, %ss # segment-address to SS xor %esp, %esp # top-of-stack into ESP # call subroutines to display this processor's Local-ID call allow_4GB_addressing # adjust FS seg-limit call display_APIC_LocalID # display this CPU's ID # increment the count of processors that have finished lock # insure 'atomic' update incw n_fin # when modifying counter # now put this processor to sleep sleep: cli # do not awaken this CPU hlt # 'fetch-execute' ceases jmp sleep # just-in-case of an NMI #------------------------------------------------------------------ .org 4096 # insures page-alignment tos: ljmp $realCS, $initAP # initialize awakened AP #------------------------------------------------------------------ .end # nothing else to assemble