For now, I am using the floppy model. Pretending a final executable is a 1.44M floppy and the bootloader, kernel loader and kernel itself are just appended on to each other on the disk image. The loader I am about to provide also has the added benefit of loading the file one sector at a time, no matter how large, so that it can take a kernel of arbitrary size and load it.
It sets a generic page table allowing access to the first 16M of memory, loads 16 bit, 32 and 64 bit descriptors in the GDT, giving the final segment as 0x28 when it launches into 64 bit mode. It also enables the SSE registers, since a lot of the code made by my C++ compiler of choice (clang) has SSE registers in heavy use. Why not? Clear 128 bits of memory or more in one instruction vs 64. An able programmer should be able to adapt this to use on a hard drive and to make it a little more dynamic. This file is intended to be loaded by the MBR and executed at 0x6000
%include "loaderconstants.inc"[ORG 0x6000][BITS 16];; Store boot driveMOV [bootDrive],DL
;; Read first sector of ELF image and get needed data from itXOR EAX,EAX
MOV AX,kernelLBAAddr
CALL ReadSector
;; First lets make sure its actually an ELFMOV EAX,0x464c457fCMP DWORD EAX,[0x7000]JNZ badELF
; Make sure its 64 bit little endianMOV AX,0x0102
CMP WORD AX,[0x7004]JNZ badELF
;; Enter unreal mode, must be done before using copyData function; Lets assume we have a computer built after 1997MOV AX,0x2401INT 0x15CLILGDT [GDTR]
MOV EAX,CR0
INC EAX
MOV CR0,EAX
MOV BX,0x20MOV FS,BX
DEC EAX
MOV CR0,EAX
STI;; Now we should be in 16 bit REAL mode with access to the first 4G of RAM through FSMOV BX,[0x703C]MOV EAX,[0x7018]MOV [krnlEntry],EAX
;; GET SHT Address;; We use the Section Header instead of the PHT, so that we can have an extra section in a seperate location, ie an;; x86 Real mode interrupt handler at 0x5000 while the kernel itself resides at 1MBMOV DWORD ESI,[0x7028]; Get sizeof(SHT)MOV AX,[0x703A] ; Size of entryMUL BX ; num entriesXOR ECX,ECX
MOV CX,AX
MOV EDI,0x8000;; Copy SHT to 0x8000 using our data read functionsCALL copyData
;; Now that we have our sections in memory, lets go through them one by one and load themXCHG BX,CX
MOV BX,0x8010elfLoop:PUSH CX
MOV EDI,[BX] ;; Destination addressADD BX,8MOV ESI,[BX] ;; Offset into fileADD BX,8MOV ECX,[BX] ;; Size;; None of these can be zeroCMP EDI,DWORD 0JZ .elSkipSection
CMP ESI,DWORD 0JZ .elSkipSection
CMP ECX,DWORD 0JZ .elSkipSection
CMP DWORD [stackStart],0JNZ .itsLoaded
MOV DWORD [stackStart],EDI.itsLoaded:CALL copyData
MOV DWORD [mallocStart],EDI.elSkipSection:ADD BX,0x30POP CX
LOOP elfLoop
;; Create page tables, assume 2MB pages are okay;; Identity map the first 16MB We can set the rest up inside the kernel, for now;; we know that we have at LEAST 16MMOV EDI,0x10000MOV DWORD [FS:EDI],0x11003ADD EDI,0x1000MOV DWORD [FS:EDI],0x12003ADD EDI,0x1000MOV DWORD [FS:EDI],0x000083ADD EDI,0x8MOV DWORD [FS:EDI],0x200083ADD EDI,0x8MOV DWORD [FS:EDI],0x400083;ADD EDI,0x8MOV DWORD [FS:EDI],0x600083;ADD EDI,0x8MOV DWORD [FS:EDI],0x800083;ADD EDI,0x8MOV DWORD [FS:EDI],0xa00083;ADD EDI,0x8MOV DWORD [FS:EDI],0xc00083;ADD EDI,0x8MOV DWORD [FS:EDI],0xe00083;;; Get int 15 memory map and store the pmode idt in preperation for bios calls from the kernelSIDT [0x7000]; use the INT 0x15, eax= 0xE820 BIOS function to get a memory map; inputs: es:di -> destination buffer for 24 byte entries; outputs: bp = entry count, trashes all registers except esiMOV DI,0x7012xor ebx, ebx ; ebx must be 0 to startxor bp, bp ; keep an entry count in bpmov edx, 0x0534D4150 ; Place "SMAP" into edxmov eax, 0xe820mov [es:di + 20], dword 1 ; force a valid ACPI 3.X entrymov ecx, 24 ; ask for 24 bytesint 0x15jc short .failed ; carry set on first call means "unsupported function"mov edx, 0x0534D4150 ; Some BIOSes apparently trash this register?cmp eax, edx ; on success, eax must have been reset to "SMAP"jne short .failedtest ebx, ebx ; ebx = 0 implies list is only 1 entry long (worthless)je short .failedjmp short .jmpin.e820lp:mov eax, 0xe820 ; eax, ecx get trashed on every int 0x15 callmov [es:di + 20], dword 1 ; force a valid ACPI 3.X entrymov ecx, 24 ; ask for 24 bytes againint 0x15jc short .e820f ; carry set means "end of list already reached"mov edx, 0x0534D4150 ; repair potentially trashed register.jmpin:jcxz .skipent ; skip any 0 length entriescmp cl, 20 ; got a 24 byte ACPI 3.X response?jbe short .notexttest byte [es:di + 20], 1 ; if so: is the "ignore this data" bit clear?je short .skipent.notext:mov ecx, [es:di + 8] ; get lower dword of memory region lengthor ecx, [es:di + 12] ; "or" it with upper dword to test for zerojz .skipent ; if length qword is 0, skip entryinc bp ; got a good entry: ++count, move to next storage spotadd di, 24.skipent:test ebx, ebx ; if ebx resets to 0, list is completejne short .e820lp.e820f:
mov [0x7010], bp ; store the entry countclc ; there is "jc" on end of list to this point, so the carry must be clearedJMP LetsGo.failed:stc ; "function unsupported" error exitJMP LetsGoLetsGo:;; Lets jump from 16 bit to 32 to 64 then to the kernelCLI ;; Goodbye interrupts until we are in C++ codeMOV EAX,CR0
INC EAX
MOV CR0,EAX
JMP 0x18:mode32mode32:[BITS 32]MOV AX,0x20MOV DS,AXMOV DX,0x3F2 ;; Turn the floppy motor off, its annoying!MOV AL,0xC
OUT DX,AL
;; Set PAE and PGE bitMOV EAX, 10100000bMOV CR4,EAXMOV EDI,0x10000MOV CR3,EDIMOV ECX, 0xC0000080 ; Read from the EFER MSR.RDMSROR EAX, 0x00000500 ; Set the LME bit.WRMSRMOV EBX,CR0 ; Activate long mode -OR EBX,0x80000001 ; - by enabling paging and protection simultaneously.MOV CR0,EBX
;; Now lets set up and activate all of that fancy math coprocessor support;; SSE InstructionsMOV EAX,CR0
AND AX,0xfffbOR AX,2MOV CR0,EAX
MOV EAX,CR4OR AX,3 << 9MOV CR4,EAXJMP 0x28: longModelongMode:[BITS 64]MOV AX,0x30MOV DS,AXMOV ES,AX
MOV FS,AX
MOV GS,AX
MOV SS,AX
XOR RSP,RSP
MOV ESP,[stackStart]
MOV QWORD RAX,[krnlEntry]XOR RDI,RDI
MOV EDI,[mallocStart]
MOV RBP,RSP
CALL RAX
CLIHLT[BITS 16]RET;; Functions;; Copies data from ESI bytes into the file to address EDI of size ECX bytes;; Dynamically loads sectors as neededcopyData:PUSH EBX
PUSH ESI
PUSH EAX
PUSH EDX
PUSH ECX
;; First get starting sectorXOR EAX,EAX
XOR EDX,EDX
MOV EAX,ESI
MOV EBX,512DIV EBXADD EAX,kernelLBAAddrCALL ReadSector
;; Copy from first sectorMOV ECX,0x200SUB ECX,EDX ;; ecx has rest of sector countPOP EBX ;; actual requested bytes in ebxCMP EBX,ECX ;; Is it less? Can it all really fit in one sector?JC .onlyOneNeeded ;; YupSUB EBX,ECXPUSH EBX
JMP .doCopy.onlyOneNeeded:XCHG EBX,ECX
PUSH DWORD 0.doCopy:
MOV ESI,EDX
ADD ESI,0x7000CALL copyBytes
;; Ok, how much is left?.cdSectorLoop:
POP ECX
CMP ECX,0JZ .cdDone ;; No more data?CMP ECX,0x200JC .cdLastSector ;; Less than one sector of data left;; Read a whole sector and transfer up to destinationSUB ECX,0x200PUSH ECX
INC EAX
CALL ReadSector
MOV ECX,0x200MOV ESI,0x7000CALL copyBytes
JMP .cdSectorLoop.cdLastSector:INC EAX
CALL ReadSector
MOV ESI,0x7000CALL copyBytes
.cdDone:POP EDX
POP EAX
POP ESI
POP EBX
RET;; Copies bytes from esi to edi;; We have to do this this way since 16 bit rep movsb will only do 64k of ram, this can access the first 4GcopyBytes:PUSH AX
.cbLoop:
MOV AL,[FS:ESI]
MOV [FS:EDI],AL
INC ESI
INC EDI
LOOP .cbLoop
POP AX
RET;; Read a sector with the LBA address in EAX into 0x7000ReadSector:PUSHADMOV [currSector],EAX
CALL incrementSpinner
MOV DL,[bootDrive]
CMP DL,0x80JNC .readHDD
; We dont need dword support for a floppyCALL LBAtoCHS
MOV DL,[bootDrive]
MOV AX,0x201MOV BX,0x7000INT 0x13JC readError
POPADRET.readHDD:MOV DWORD [HDDReadPacket.sector],EAXMOV AX,0x4200MOV SI,HDDReadPacket
INT 0x13JC readError
POPADRET;; Converts LBA to CHS address for a 1.44 floppyLBAtoCHS:;[in AX=LBA Sector];[out DX,CX]XOR CX,CX
XOR DX,DX
DIV WORD [flpSecTrk]INC DX
MOV CL,DL
XOR DX,DX
DIV WORD [flpHds]MOV DH,DL
MOV CH,AL
RET;; Incrememnts the spinner so that the user can see something is happeningincrementSpinner:PUSH SI
PUSH CX
MOV SI,txtSpinner
XOR CX,CX
MOV CL,[txtSpPos]
INC CL
.incrementSpinner1:
ADD SI,3LOOP .incrementSpinner1MOV CL,[txtSpPos]
CALL printString
INC CL
CMP CL,4JLE .incrementSpinnerOutMOV CL,0
.incrementSpinnerOut:MOV [txtSpPos],CL
POP CX
POP SI
RETprintString:PUSH AX
PUSH BX
PUSH CX
MOV AH,0xe
XOR BX,BX
XOR CX,CX
.printStringLoop:
LODSBTEST AL,AL
JZ .printStringExit
INT 0x10JMP .printStringLoop.printStringExit:POP CX
POP BX
POP AX
RET;; Error functionsreadError:MOV SI,readErrorStr
CALL printString
CLIHLTreadErrorStr db 13,10,13,10,"Disk Read error",0badELF:MOV SI,badELFStr
CALL printString
CLIHLTbadELFStr db 13,10,13,10,"Corrupted ELF Image!",0;; DatatxtSpinner db 0,0,0,"/",8,0,"-",8,0,"\",8,0,"|",8,0,".",0txtSpPos db 0bootDrive db 0currSector dd 0flpSecTrk dw 18flpHds dw 2krnlEntry dq 0mallocStart dd 0stackStart dd 0HDDReadPacket:;; Some of these values are staticdb 0x10db 0
dw 1dw 0x7000dw 0
.sector dq 0ALIGN 8GDT:dq 0
;; 16 Bitdd 0x0000ffff ;; Code 0x8dd 0x00009c00dd 0x0000ffff ;; Data 0x10dd 0x00009200;; 32 Bit Segmentsdd 0x0000ffff ;; Code 0x18dd 0x00cf9c00dd 0x0000ffff ;; Data 0x20dd 0x00cf9200;; 64 bitdq 0x002f98000000ffff ; Code 0x28dq 0x002f92000000ffff ; Data 0x30GDTR:dw (GDTR-GDT)-1dd GDT
TIMES (512 * (loaderNumSects))-($-$$) DB 90