Make your own OS in 30 days – day5: Characters and Mouse Cursor

VRAM address, x/y size were hard coded in Day 4 example.

vram = (char *) 0xa0000;
xsize = 320;
ysize = 200;

Day 5 reads it from 0x0ff4 set by asmhead.nas via BOOTINFO struct as below.

CYLS    EQU             0x0ff0 
LEDS    EQU             0x0ff1
VMODE   EQU             0x0ff2 
SCRNX   EQU             0x0ff4 
SCRNY   EQU             0x0ff6 
VRAM    EQU             0x0ff8 
MOV		DWORD [VRAM],0x000a0000


struct BOOTINFO {
 char cyls, leds, vmode, reserve;
 short scrnx, scrny;
 char *vram;

void HariMain(void)
 struct BOOTINFO *binfo = (struct BOOTINFO *) 0x0ff0;                                                                                              
 init_screen8(binfo->vram, binfo->scrnx, binfo->scrny);

Then it creates ASCII bitmap fonts and a mouse cursor from obj file built from text files like this.

char 0x31

And use it like this. Finally we can get variable values on the screen for debugging. Since we don’t have debug output or more fancy port to get debug info, this is useful during OS creation time.

void putfont8(char *vram, int xsize, int x, int y, char c, char *font)
	int i;
	char *p, d /* data */;
	for (i = 0; i < 16; i++) {
		p = vram + (y + i) * xsize + x;
		d = font[i];
		if ((d & 0x80) != 0) { p[0] = c; }
		if ((d & 0x40) != 0) { p[1] = c; }
		if ((d & 0x20) != 0) { p[2] = c; }
		if ((d & 0x10) != 0) { p[3] = c; }
		if ((d & 0x08) != 0) { p[4] = c; }
		if ((d & 0x04) != 0) { p[5] = c; }
		if ((d & 0x02) != 0) { p[6] = c; }
		if ((d & 0x01) != 0) { p[7] = c; }

void putfonts8_asc(char *vram, int xsize, int x, int y, char c, unsigned char *s)
	extern char hankaku[4096]; // <-- stored in obj file as _hankaku
	for (; *s != 0x00; s++) {
		putfont8(vram, xsize, x, y, c, hankaku + *s * 16);
		x += 8;

Then it explains GDT; Global Descriptor Table, IDT; Interrupt Descriptor Table, Segmentation Interrupt or etc to control devices such as a mouse/keyboard/NIC.

Screen Shot 2014-10-13 at 22.22.37

Golang calling convention

I confirmed caller allocates stack for arguments and return value and callee uses it as below. It didn’t use rcx, rdx, r8, r9 for args, rax for return value (different from x86_64 calling convention).

Sample Function:

  9 c := f1(5,6)
 15 func f1(a int, b int) int {
 16   return f2(a, b)
 17 }

Compiled Code:

// c := f1(5,6)
   0x000000000000228f <+191>:	mov    QWORD PTR [rsp],0x5
   0x0000000000002297 <+199>:	mov    QWORD PTR [rsp+0x8],0x6
   0x00000000000022a0 <+208>:	call   0x2000 
   0x00000000000022a5 <+213>:	mov    rcx,QWORD PTR [rsp+0x10]        :	mov    rcx,QWORD PTR gs:0x8a0
   0x0000000000002009 <+9>:	cmp    rsp,QWORD PTR [rcx]
   0x000000000000200c <+12>:	ja     0x2015 <main.f1+21>
   0x000000000000200e <+14>:	call   0x27fb0 
   0x0000000000002013 <+19>:	jmp    0x2000 
   0x0000000000002015 <+21>:	sub    rsp,0x18                         :	mov    rbx,QWORD PTR [rsp+0x20]
   0x000000000000201e <+30>:	mov    QWORD PTR [rsp],rbx              :	mov    rbx,QWORD PTR [rsp+0x28]
   0x0000000000002027 <+39>:	mov    QWORD PTR [rsp+0x8],rbx          :	call   0x2040 
   0x0000000000002031 <+49>:	mov    rbx,QWORD PTR [rsp+0x10]         :	mov    QWORD PTR [rsp+0x30],rbx
   0x000000000000203b <+59>:	add    rsp,0x18
   0x000000000000203f <+63>:	ret    

Make your own OS in 30 days – day4: VRAM

Day 4 initialize pallet and write directly into VRAM in C.
To set pallet,

  1. disable interrupts (call CLI = CLear Interrupt flag to reset it)
  2. write pallet number int 0x03c8
  3. write R, G, B into 0x03c9 in the order
  4. restore the interrupt flag

Since the interrupt flag is the 9th bit of the eflags, if you restore it, you don’t need to call STI.

void set_palette(int start, int end, unsigned char *rgb)
	int i, eflags;
	eflags = io_load_eflags();	/* save the current eflags */
	io_cli(); 			/* disable interrupts */
	io_out8(0x03c8, start);		/* send data to the device 3c8 */
	for (i = start; i <= end; i++) {
		io_out8(0x03c9, rgb[0] / 4);  /* send data to the device 3c9 */
		io_out8(0x03c9, rgb[1] / 4);
		io_out8(0x03c9, rgb[2] / 4);
		rgb += 3;
	io_store_eflags(eflags);	/* reset eflags */

io_load_eflags, io_store, flags, _io_cli and io_out8 are written as below.


		MOV		EDX,[ESP+4]		; port
		MOV		AL,[ESP+8]		; data


		MOV		EAX,[ESP+4]

Now we are ready to write in VRAM (starts at 0xa000).

    vram = (char *) 0xa0000;
    xsize = 320;
    ysize = 200;
    boxfill8(vram, xsize, COL8_008484,  0,         0,          xsize -  1, ysize - 29);

void boxfill8(unsigned char *vram, int xsize, unsigned char c, int x0, int y0, int x1, int y1)
    int x, y;
        for (y = y0; y <= y1; y++) {
            for (x = x0; x <= x1; x++)
                vram[y * xsize + x] = c;

When you draw several rectangles, it'll be like this.

Day 5 will draw characters and the mouse cursor which doesn't move, and learn segmentation and interrupt.

Make your own OS in 30 days – day3: 32bit mode and OS in C

We only wrote 16bit assembly language on day 1 and 2.
Day 3 started with Floppy Disk architecture by illustrating cylinder, header, sector and drive, then how to read from there.

MOV		AX,0x0820 		; loads it at 0x8200 (ES x 16 + BX)
MOV		CH,0			; cylinder 0
MOV		DH,0			; head 0
MOV		CL,2			; sector 2

MOV		AH,0x02			; AH=0x02 : read disk
MOV		AL,1			; read 1 sector
MOV		DL,0x00			; A drive
INT		0x13			; Disk BIOS call
JC		error

Int 13H is described here.
It explains segment registers, how to retry when error (if the carry flag is set), read 18 sectors, and read 18 sectors x 2 heads x 10 cylinders into memory.

When you compile this very simple OS code

    JMP		fin

It’ll be compiled to the 3 byte code.

f4 eb fd

Then when you make a disk image file, it’ll be placed at 0x4200.

0004200: f4eb fd00 0000 0000 0000 0000 0000 0000
0004210: 0000 0000 0000 0000 0000 0000 0000 0000
0004220: 0000 0000 0000 0000 0000 0000 0000 0000
0004230: 0000 0000 0000 0000 0000 0000 0000 0000

Since the bootstrap loads the first byte of sector 2 at 0x8200 (first byte of sector 1 is at 0x8000), the 3byte OS code at 0x4200 will be loaded at 0x8000 + 0x4200 = 0xc200.

So, “ORG 0xc200” is added to the OS code,

    ORG 0xc200

    JMP		fin

then it adds “JMP 0xc200” at the end of the IPL. Then the IPS will JMP to the OS (HLT code) after loading the sectors into memory.

Just doing HLT is not interesting. The book changes the OS to this which fills the screen in black.

CYLS	EQU		0x0ff0			; set by the bootsector
LEDS	EQU		0x0ff1
VMODE	EQU		0x0ff2			; color bitness
SCRNX	EQU		0x0ff4			; X resolution
SCRNY	EQU		0x0ff6			; Y resolution
VRAM	EQU		0x0ff8			; VRAM

	ORG		0xc200
	MOV		AL,0x13			; VGA,3 20x200x8bit color
	MOV		AH,0x00
	INT		0x10
	MOV		BYTE [VMODE],8	; screen mode
	MOV		DWORD [VRAM],0x000a0000

; Get Keyboard's LED state from BIOS
	MOV		AH,0x02
	INT		0x16 			; keyboard BIOS

	JMP		fin

We are getting into 32bit mode & C language from here. Now we have 4 source files.
ipl10.nas: bootstrap code (x86 16bit assembly)
asmhead.nas: OS loader called by bootstrap (x86 32bit assembly, it’s moving into 32bit mode and loads OS code written in C. Not explained as of Day 3.)
bootpack.c: OS code (C)
nasfunc.nas: functions called by boot pack.c (x86 32bit assembly)


void io_hlt(void);

void HariMain(void)
	io_hlt(); /* calls _io_hlt in naskfunc.nas */
	goto fin;



; naskfunc
; TAB=4

[FORMAT "WCOFF"]				; create an object file
[BITS 32]						; 32bit mode

[FILE "naskfunc.nas"]			; source file name
		GLOBAL	_io_hlt			; function name included in this file

[SECTION .text]		; text section

_io_hlt:	; void io_hlt(void);

When you boot it, ipl10 loads the image into memory and JPM to 0xC200 (asmhead.nas) fills in the screen in black and calls bootpack.c which halts.


Day 4 will do more in C 🙂

Make your own OS in 30 days – day2: Assembly Language

Day 2 changes the previous one into x86 assembly code as below. That will go into the boot sector and the other sectors are created by the author’s image edit tool.

; hello-os
; TAB=4

ORG		0x7c00			; Where the program is loaded
						; If ORG is available, $ will be the address of the current code
						; 0x00007c00 - 0x00007dff : boot sector should be loaded here

; For standard FAT12 floppy disk
; --- boot sector ---

JMP		entry
DB		0x90
DB		"HELLOIPL"		; Boot sector name (8bytes)
DW		512			; Sector size (must be 512)
DB		1			; Cluster size (must be 1)
DW		1			; Where FAT begins (usually 1)
DB		2			; Number of FATs (must be 2)
DW		224			; Root directory size (usually 224 entries)
DW		2880			; Drive size (must be 2880 sectors)
DB		0xf0			; Media type(must be 0xf0)
DW		9			; Length of FAT area(must be 9 sectors)
DW		18			; Sectors per track(must be 18)
DW		2			; Number of heads (must be 2)
DD		0			; Must be 0 since there is no partition
DD		2880			; Drive size again
DB		0,0,0x29		; Not sure
DD		0xffffffff		; Probably volume serial number
DB		"HELLO-OS   "		; Name of the disc (11 bytes)
DB		"FAT12   "		; Name of the format (8 bytes)
RESB	18				; Reserve 18 bytes

; program body

MOV		AX,0			; init registers
MOV		SP,0x7c00

MOV		SI,msg
JE		fin
MOV		AH,0x0e			; Function to display a character
MOV		BX,15			; Color code
INT		0x10			; Video BIOS call (INT 0x10), see
JMP		putloop
HLT					; Halt CPU when something happens
JMP		fin			; Infinite loop

DB		0x0a, 0x0a		; LF x 2
DB		"hello, koide"
DB		0x0a			; LF
DB		0

RESB	0x7dfe-$			; Pad 0x00 until 0x7dfe

DB		0x55, 0xaa

We are making it 32bit on Day 3.

Make your own OS in 30 days – day1: How PC starts up

Make your own OS in 30 daysI bought a very nice book Meke your own OS in 30 days.

Someone created a Mac version here.

When you type below in helloos0.img and boot from there,

    1 0000000: eb4e 9048 454c 4c4f 4950 4c00 0201 0100  .N.HELLOIPL.....                                                                                              
    2 0000010: 02e0 0040 0bf0 0900 1200 0200 0000 0000  ...@............
    3 0000020: 400b 0000 0000 29ff ffff ff48 454c 4c4f  @.....)....HELLO
    4 0000030: 2d4f 5320 2020 4641 5431 3220 2020 0000  -OS   FAT12   ..
    5 0000040: 0000 0000 0000 0000 0000 0000 0000 0000  ................
    6 0000050: b800 008e d0bc 007c 8ed8 8ec0 be74 7c8a  .......|.....t|.
    7 0000060: 0483 c601 3c00 7409 b40e bb0f 00cd 10eb  ....

It starts up and shows “hello, world”.


The image was created by compiling the following nas file. The has file is a source fails for nask which was created by the author by copying a popular assembler nasm.

; hello-os
; TAB=4

; For standard FAT12 floppy disk

; --- boot sector ---

DB		0xeb, 0x4e, 0x90
DB		"HELLOIPL"		; Boot sector name (8bytes)
DW		512			; Sector size (must be 512)
DB		1			; Cluster size (must be 1)
DW		1			; Where FAT begins (usually 1)
DB		2			; Number of FATs (must be 2)
DW		224			; Root directory size (usually 224 entries)
DW		2880			; Drive size (must be 2880 sectors)
DB		0xf0			; Media type(must be 0xf0)
DW		9			; Length of FAT area(must be 9 sectors)
DW		18			; Sectors per track(must be 18)
DW		2			; Number of heads (must be 2)
DD		0			; Must be 0 since there is no partition
DD		2880			; Drive size again
DB		0,0,0x29		; Not sure
DD		0xffffffff		; Probably volume serial number
DB		"HELLO-OS   "		; Name of the disc (11 bytes)
DB		"FAT12   "		; Name of the format (8 bytes)
RESB	18				; Reserve 18 bytes

; Program body

DB		0xb8, 0x00, 0x00, 0x8e, 0xd0, 0xbc, 0x00, 0x7c
DB		0x8e, 0xd8, 0x8e, 0xc0, 0xbe, 0x74, 0x7c, 0x8a
DB		0x04, 0x83, 0xc6, 0x01, 0x3c, 0x00, 0x74, 0x09
DB		0xb4, 0x0e, 0xbb, 0x0f, 0x00, 0xcd, 0x10, 0xeb
DB		0xee, 0xf4, 0xeb, 0xfd

; Messages

DB		0x0a, 0x0a		; LF x 2
DB		"hello, koide"
DB		0x0a			; LF
DB		0

RESB	0x1fe-$				; Pad 0x00 until 0x001fe

DB		0x55, 0xaa

; --- end of boot sector ---
; --- 2nd sector and later ---

DB		0xf0, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00
RESB	4600
DB		0xf0, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00
RESB	1469432
; --- end of the image

When PC starts up, it reads the boot sector of the disk. If the word at 0x1fe is ’55aa’, it assumes it’s a bootable device and continue. According to the author, it’s decided by the PC designer and he doesn’t know why it’s 55aa.

We are changing the DB,DW,DD into more meaningful instructions on day 2.

GCC Inline Assembly

From here, here and here

#include <stdio.h>

int func(int a, int b); 
void cpuid(int info, int *eax, int *ebx, int *ecx, int *edx);

int func(int a, int b)
	int out = 0;

		"movl %1, %%eax;"
		"addl %2, %%eax;"
		"movl %%eax, %0;"
		:"=r"(out)      /* output */
		:"r"(a),"r"(b)▸ /* input */
		:"eax"▸ 		/* clobbered register */
	return out;

void cpuid(int info, int *eax, int *ebx, int *ecx, int *edx)
	*eax = info;
		"movl %%ebx, %%edi;"	/* 32bit PIC: don't clobber ebx */
		"movl %%ebx, %%esi;"
		"movl %%edi, %%ebx;"

int main(int args, char* argv[])
	int eax, ebx, ecx, edx;
	printf("ret=%d\n", func(3,5));
	cpuid(1, &eax, &ebx, &ecx, &edx);
	printf("eax:0x%08x, ebx:0x%08x, ecx:0x%08x, edx:0x%08x\n", eax, ebx, ecx, edx);
	printf("stepping:        0x%x\n", eax & 0xF);
	printf("model:           0x%x\n", (eax>>4) & 0xF);
	printf("family:          0x%x\n", (eax>>8) & 0xF);
	printf("processor type:  0x%x\n", (eax>>12) & 0x3);
	printf("extended model:  0x%x\n", (eax>>16) & 0xF);
	printf("extended family: 0x%x\n", (eax>>20) & 0xFF);
	return 0;


eax:0x000206a7, ebx:0x00100800, ecx:0x1fbae3ff, edx:0xbfebfbff
stepping:        0x7
model:           0xa
family:          0x6
processor type:  0x0
extended model:  0x2
extended family: 0x0

x86_64 calling convention

As written in Wikipedia, Linux/MacOS X uses RDI, RSI, RDX, RCX, R8, and R9 for the first 6 args (uses XMM0-7 fro float) + stack for the others.

Windows uses RCX, RDX, R8, R9 (uses XMM0-3 for float) + stack.

#include <iostream>

using namespace std;

int foo(int a, int b, int c, int d, int e, int f, int g){
  int r = a+b+c+d+e+f+g;
  return r;

int main(int argc, char const* argv[])
  cout << foo(1,2,3,4,5,6,7);
  return 0;

MacOS X 10.9:

(lldb) disassemble --name main
foo[0x100000ec0]:  push   rbp
foo[0x100000ec1]:  mov    rbp, rsp
foo[0x100000ec4]:  push   rbx
foo[0x100000ec5]:  sub    rsp, 0x38
foo[0x100000ec9]:  mov    eax, 0x1
foo[0x100000ece]:  mov    ecx, 0x2
foo[0x100000ed3]:  mov    edx, 0x3
foo[0x100000ed8]:  mov    r8d, 0x4
foo[0x100000ede]:  mov    r9d, 0x5
foo[0x100000ee4]:  mov    r10d, 0x6
foo[0x100000eea]:  mov    r11d, 0x7
foo[0x100000ef0]:  mov    rbx, qword ptr [rip + 0x121] ; (void *)0x0000000000000000
foo[0x100000ef7]:  mov    rbx, qword ptr [rbx]
foo[0x100000efa]:  mov    qword ptr [rbp - 0x10], rbx
foo[0x100000efe]:  mov    dword ptr [rbp - 0x14], 0x0
foo[0x100000f05]:  mov    dword ptr [rbp - 0x18], edi
foo[0x100000f08]:  mov    qword ptr [rbp - 0x20], rsi
foo[0x100000f0c]:  mov    edi, eax
foo[0x100000f0e]:  mov    esi, ecx
foo[0x100000f10]:  mov    ecx, r8d
foo[0x100000f13]:  mov    r8d, r9d
foo[0x100000f16]:  mov    r9d, r10d
foo[0x100000f19]:  mov    dword ptr [rsp], 0x7
foo[0x100000f20]:  mov    dword ptr [rbp - 0x24], r11d
foo[0x100000f24]:  call   0x100000e50               ; foo(int, int, int, int, int, int, int)
foo[0x100000f29]:  mov    rdi, qword ptr [rip + 0xe0] ; (void *)0x0000000000000000
foo[0x100000f30]:  mov    esi, eax
foo[0x100000f32]:  call   0x100000f60               ; symbol stub for: std::__1::basic_ostream<char, std::__1::char_traits<char> >::operator<<(int)
foo[0x100000f37]:  mov    rdi, qword ptr [rip + 0xda] ; (void *)0x0000000000000000
foo[0x100000f3e]:  mov    rdi, qword ptr [rdi]
foo[0x100000f41]:  cmp    rdi, qword ptr [rbp - 0x10]
foo[0x100000f45]:  mov    qword ptr [rbp - 0x30], rax
foo[0x100000f49]:  jne    0x100000f5b               ; main + 155
foo[0x100000f4f]:  mov    eax, 0x0
foo[0x100000f54]:  add    rsp, 0x38
foo[0x100000f58]:  pop    rbx
foo[0x100000f59]:  pop    rbp
foo[0x100000f5a]:  ret    
foo[0x100000f5b]:  call   0x100000f66               ; symbol stub for: __stack_chk_fail
(lldb) disassemble --name foo
foo`foo(int, int, int, int, int, int, int):
foo[0x100000e50]:  push   rbp
foo[0x100000e51]:  mov    rbp, rsp
foo[0x100000e54]:  sub    rsp, 0x30
foo[0x100000e58]:  mov    eax, dword ptr [rbp + 0x10]
foo[0x100000e5b]:  mov    r10, qword ptr [rip + 0x1b6] ; (void *)0x0000000000000000
foo[0x100000e62]:  mov    r11, qword ptr [r10]
foo[0x100000e65]:  mov    qword ptr [rbp - 0x8], r11
foo[0x100000e69]:  mov    dword ptr [rbp - 0xc], edi
foo[0x100000e6c]:  mov    dword ptr [rbp - 0x10], esi
foo[0x100000e6f]:  mov    dword ptr [rbp - 0x14], edx
foo[0x100000e72]:  mov    dword ptr [rbp - 0x18], ecx
foo[0x100000e75]:  mov    dword ptr [rbp - 0x1c], r8d
foo[0x100000e79]:  mov    dword ptr [rbp - 0x20], r9d
foo[0x100000e7d]:  mov    dword ptr [rbp - 0x24], eax
foo[0x100000e80]:  mov    eax, dword ptr [rbp - 0xc]
foo[0x100000e83]:  add    eax, dword ptr [rbp - 0x10]
foo[0x100000e86]:  add    eax, dword ptr [rbp - 0x14]
foo[0x100000e89]:  add    eax, dword ptr [rbp - 0x18]
foo[0x100000e8c]:  add    eax, dword ptr [rbp - 0x1c]
foo[0x100000e8f]:  add    eax, dword ptr [rbp - 0x20]
foo[0x100000e92]:  add    eax, dword ptr [rbp - 0x24]
foo[0x100000e95]:  mov    dword ptr [rbp - 0x28], eax
foo[0x100000e98]:  mov    eax, dword ptr [rbp - 0x28]
foo[0x100000e9b]:  mov    r10, qword ptr [r10]
foo[0x100000e9e]:  cmp    r10, qword ptr [rbp - 0x8]
foo[0x100000ea2]:  mov    dword ptr [rbp - 0x2c], eax
foo[0x100000ea5]:  jne    0x100000eb4               ; foo(int, int, int, int, int, int, int) + 100
foo[0x100000eab]:  mov    eax, dword ptr [rbp - 0x2c]
foo[0x100000eae]:  add    rsp, 0x30
foo[0x100000eb2]:  pop    rbp
foo[0x100000eb3]:  ret    
foo[0x100000eb4]:  call   0x100000f66               ; symbol stub for: __stack_chk_fail
foo[0x100000eb9]:  nop    dword ptr [rax]

Windows 8:

0:000> uf foo!main (int, char **)
foo!main [c:\users\sokoide\projects\spike\foo\foo.cpp @ 16]:
   16 00007ff7`fbfd2400 4889542410      mov     qword ptr [rsp+10h],rdx
   16 00007ff7`fbfd2405 894c2408        mov     dword ptr [rsp+8],ecx
   16 00007ff7`fbfd2409 57              push    rdi
   16 00007ff7`fbfd240a 4883ec40        sub     rsp,40h
   16 00007ff7`fbfd240e 488bfc          mov     rdi,rsp
   16 00007ff7`fbfd2411 b910000000      mov     ecx,10h
   16 00007ff7`fbfd2416 b8cccccccc      mov     eax,0CCCCCCCCh
   16 00007ff7`fbfd241b f3ab            rep stos dword ptr [rdi]
   16 00007ff7`fbfd241d 8b4c2450        mov     ecx,dword ptr [rsp+50h]
   17 00007ff7`fbfd2421 c744243007000000 mov     dword ptr [rsp+30h],7
   17 00007ff7`fbfd2429 c744242806000000 mov     dword ptr [rsp+28h],6
   17 00007ff7`fbfd2431 c744242005000000 mov     dword ptr [rsp+20h],5
   17 00007ff7`fbfd2439 41b904000000    mov     r9d,4
   17 00007ff7`fbfd243f 41b803000000    mov     r8d,3
   17 00007ff7`fbfd2445 ba02000000      mov     edx,2
   17 00007ff7`fbfd244a b901000000      mov     ecx,1
   17 00007ff7`fbfd244f e8f6ecffff      call    foo!ILT+325(?fooYAHHHHHHHHZ) (00007ff7`fbfd114a)
   17 00007ff7`fbfd2454 8bd0            mov     edx,eax
   17 00007ff7`fbfd2456 488b0dc3ec0000  mov     rcx,qword ptr [foo!_imp_?coutstd (00007ff7`fbfe1120)]
   17 00007ff7`fbfd245d ff15c5ec0000    call    qword ptr [foo!_imp_??6?$basic_ostreamDU?$char_traitsDstdstdQEAAAEAV01HZ (00007ff7`fbfe1128)]
   18 00007ff7`fbfd2463 33c0            xor     eax,eax
   19 00007ff7`fbfd2465 4883c440        add     rsp,40h
   19 00007ff7`fbfd2469 5f              pop     rdi
   19 00007ff7`fbfd246a c3              ret
                   ^ Extra character error in 'uf foo!main (int, char **)'
0:000> uf foo!foo (int, int, int, int, int, int, int)
foo!foo [c:\users\sokoide\projects\spike\foo\foo.cpp @ 10]:
   10 00007ff7`fbfd23a0 44894c2420      mov     dword ptr [rsp+20h],r9d
   10 00007ff7`fbfd23a5 4489442418      mov     dword ptr [rsp+18h],r8d
   10 00007ff7`fbfd23aa 89542410        mov     dword ptr [rsp+10h],edx
   10 00007ff7`fbfd23ae 894c2408        mov     dword ptr [rsp+8],ecx
   10 00007ff7`fbfd23b2 57              push    rdi
   10 00007ff7`fbfd23b3 4883ec10        sub     rsp,10h
   10 00007ff7`fbfd23b7 488bfc          mov     rdi,rsp
   10 00007ff7`fbfd23ba b904000000      mov     ecx,4
   10 00007ff7`fbfd23bf b8cccccccc      mov     eax,0CCCCCCCCh
   10 00007ff7`fbfd23c4 f3ab            rep stos dword ptr [rdi]
   10 00007ff7`fbfd23c6 8b4c2420        mov     ecx,dword ptr [rsp+20h]
   11 00007ff7`fbfd23ca 8b442428        mov     eax,dword ptr [rsp+28h]
   11 00007ff7`fbfd23ce 8b4c2420        mov     ecx,dword ptr [rsp+20h]
   11 00007ff7`fbfd23d2 03c8            add     ecx,eax
   11 00007ff7`fbfd23d4 8bc1            mov     eax,ecx
   11 00007ff7`fbfd23d6 03442430        add     eax,dword ptr [rsp+30h]
   11 00007ff7`fbfd23da 03442438        add     eax,dword ptr [rsp+38h]
   11 00007ff7`fbfd23de 03442440        add     eax,dword ptr [rsp+40h]
   11 00007ff7`fbfd23e2 03442448        add     eax,dword ptr [rsp+48h]
   11 00007ff7`fbfd23e6 03442450        add     eax,dword ptr [rsp+50h]
   11 00007ff7`fbfd23ea 890424          mov     dword ptr [rsp],eax
   12 00007ff7`fbfd23ed 8b0424          mov     eax,dword ptr [rsp]
   13 00007ff7`fbfd23f0 4883c410        add     rsp,10h
   13 00007ff7`fbfd23f4 5f              pop     rdi
   13 00007ff7`fbfd23f5 c3              ret
                  ^ Extra character error in 'uf foo!foo (int, int, int, int, int, int, int)'

ETW test

Created a test php page which simply sleeps for a specified seconds, and called it from Windows with ETW turned on.

On Mac: test.php

$delay = $_GET["delay"];
echo date('h:i:s') . "<br/>\n";

On Windows:

xperf.exe -on PROC_THREAD+LOADER+Base+CSWITCH+DISPATCHER -stackwalk Profile+CSwitch+ReadyThread

run the test:
Open Powershell Window and run -
 $wc = New-Object System.Net.WebClient

xperf.exe -stop -d %temp%\hoge.etl

see the trace:
xperf.exe %temp%\hoge.etl

It traced in kernel mode when the user mode thread waited on the sync object.

1 2 3 4 5 6 8