turnstile/xen-patch/turnstile.c
2026-01-15 17:08:51 +00:00

617 lines
15 KiB
C

#include <xen/sched.h>
#include <xen/domain.h>
#include <xen/guest_access.h>
#include <xen/hypercall.h>
#include <xen/mm.h>
#include <xen/errno.h>
#include <xen/lib.h>
#include <asm/p2m.h>
#include <asm/turnstile.h>
static struct turnstile_domain_state *get_turnstile_state(struct domain *d)
{
return d->arch.turnstile;
}
int turnstile_domain_init(struct domain *d)
{
struct turnstile_domain_state *state;
state = xzalloc(struct turnstile_domain_state);
if ( !state )
return -ENOMEM;
rwlock_init(&state->lock);
state->policy = TURNSTILE_POLICY_DISABLED;
state->ring_head = 0;
state->ring_tail = 0;
state->ring_seq = 0;
state->overflow_count = 0;
state->num_ranges = 0;
state->func_entries = NULL;
state->num_func_entries = 0;
state->jump_entries = NULL;
state->num_jump_entries = 0;
state->write_grant.active = false;
d->arch.turnstile = state;
return 0;
}
void turnstile_domain_destroy(struct domain *d)
{
struct turnstile_domain_state *state = get_turnstile_state(d);
if ( !state )
return;
xfree(state->func_entries);
xfree(state->jump_entries);
xfree(state);
d->arch.turnstile = NULL;
}
static bool gpa_in_range(struct turnstile_domain_state *state, uint64_t gpa)
{
unsigned int i;
for ( i = 0; i < state->num_ranges; i++ )
{
if ( state->ranges[i].active &&
gpa >= state->ranges[i].gpa_start &&
gpa < state->ranges[i].gpa_end )
return true;
}
return false;
}
static bool check_write_grant(struct turnstile_domain_state *state, uint64_t gpa,
bool *grant_expired)
{
*grant_expired = false;
if ( !state->write_grant.active )
return false;
if ( NOW() > state->write_grant.expiry )
{
*grant_expired = true;
return false;
}
return gpa >= state->write_grant.gpa_start &&
gpa < state->write_grant.gpa_end;
}
static int check_implicit_ftrace(struct turnstile_domain_state *state,
uint64_t gpa, const uint8_t *insn_bytes)
{
unsigned int i;
uint64_t vaddr = gpa + 0xffff800000000000ULL;
for ( i = 0; i < state->num_func_entries; i++ )
{
if ( state->func_entries[i] == vaddr )
{
if ( insn_bytes[0] == 0xe8 )
return TURNSTILE_IMPLICIT_FTRACE;
if ( insn_bytes[0] == 0x0f && insn_bytes[1] == 0x1f &&
insn_bytes[2] == 0x44 && insn_bytes[3] == 0x00 &&
insn_bytes[4] == 0x00 )
return TURNSTILE_IMPLICIT_FTRACE;
if ( insn_bytes[0] == 0x90 ||
(insn_bytes[0] == 0x66 && insn_bytes[1] == 0x90) )
return TURNSTILE_IMPLICIT_FTRACE;
}
}
return TURNSTILE_IMPLICIT_DENIED;
}
static int check_implicit_static_key(struct turnstile_domain_state *state,
uint64_t gpa, const uint8_t *insn_bytes)
{
unsigned int i;
uint64_t vaddr = gpa + 0xffff800000000000ULL;
for ( i = 0; i < state->num_jump_entries; i++ )
{
uint64_t code_addr = state->jump_entries[i * 3];
if ( code_addr == vaddr )
{
if ( insn_bytes[0] == 0x0f && insn_bytes[1] == 0x1f )
return TURNSTILE_IMPLICIT_STATIC;
if ( insn_bytes[0] == 0xe9 )
return TURNSTILE_IMPLICIT_STATIC;
if ( insn_bytes[0] == 0xeb )
return TURNSTILE_IMPLICIT_STATIC;
if ( insn_bytes[0] == 0x90 )
return TURNSTILE_IMPLICIT_STATIC;
if ( insn_bytes[0] == 0x66 && insn_bytes[1] == 0x90 )
return TURNSTILE_IMPLICIT_STATIC;
}
}
return TURNSTILE_IMPLICIT_DENIED;
}
static void log_violation(struct turnstile_domain_state *state,
uint64_t gpa, uint64_t rip, uint64_t cr3,
uint32_t access_flags, uint32_t response,
const uint8_t *insn_bytes)
{
struct turnstile_violation *v;
uint32_t head;
uint32_t next_head;
head = state->ring_head;
next_head = (head + 1) % TURNSTILE_RING_SIZE;
if ( next_head == state->ring_tail )
{
state->overflow_count++;
state->stats.ring_overflows++;
state->ring_tail = (state->ring_tail + 1) % TURNSTILE_RING_SIZE;
}
v = &state->ring[head];
v->timestamp = NOW();
v->gpa = gpa;
v->rip = rip;
v->cr3 = cr3;
v->access_flags = access_flags;
v->response = response;
v->seq = state->ring_seq++;
memcpy(v->insn_bytes, insn_bytes, 16);
state->ring_head = next_head;
state->stats.violations_total++;
if ( response )
state->stats.violations_blocked++;
else
state->stats.violations_allowed++;
}
int turnstile_check_violation(struct domain *d, uint64_t gpa, uint64_t rip,
uint64_t cr3, uint32_t access_flags,
const uint8_t *insn_bytes)
{
struct turnstile_domain_state *state = get_turnstile_state(d);
int implicit;
int block = 0;
bool in_range;
bool grant_valid;
bool grant_expired;
uint32_t policy;
if ( !state || state->policy == TURNSTILE_POLICY_DISABLED )
return 0;
read_lock(&state->lock);
in_range = gpa_in_range(state, gpa);
if ( !in_range )
{
read_unlock(&state->lock);
return 0;
}
grant_valid = check_write_grant(state, gpa, &grant_expired);
if ( grant_valid )
{
read_unlock(&state->lock);
write_lock(&state->lock);
state->stats.explicit_write_grants++;
write_unlock(&state->lock);
return 0;
}
implicit = check_implicit_ftrace(state, gpa, insn_bytes);
if ( implicit == TURNSTILE_IMPLICIT_FTRACE )
{
read_unlock(&state->lock);
write_lock(&state->lock);
state->stats.implicit_ftrace++;
write_unlock(&state->lock);
return 0;
}
implicit = check_implicit_static_key(state, gpa, insn_bytes);
if ( implicit == TURNSTILE_IMPLICIT_STATIC )
{
read_unlock(&state->lock);
write_lock(&state->lock);
state->stats.implicit_static_key++;
write_unlock(&state->lock);
return 0;
}
policy = state->policy;
read_unlock(&state->lock);
if ( policy == TURNSTILE_POLICY_ENFORCE )
block = 1;
write_lock(&state->lock);
if ( grant_expired )
state->write_grant.active = false;
log_violation(state, gpa, rip, cr3, access_flags, block, insn_bytes);
write_unlock(&state->lock);
return block;
}
static long turnstile_set_policy(struct domain *d,
XEN_GUEST_HANDLE_PARAM(void) arg)
{
struct turnstile_domain_state *state = get_turnstile_state(d);
struct turnstile_op_set_policy op;
if ( !state )
return -EINVAL;
if ( copy_from_guest(&op, arg, 1) )
return -EFAULT;
if ( op.policy > TURNSTILE_POLICY_ENFORCE )
return -EINVAL;
write_lock(&state->lock);
state->policy = op.policy;
write_unlock(&state->lock);
return 0;
}
static long turnstile_protect_range(struct domain *d,
XEN_GUEST_HANDLE_PARAM(void) arg)
{
struct turnstile_domain_state *state = get_turnstile_state(d);
struct turnstile_op_protect_range op;
unsigned int i;
int slot = -1;
if ( !state )
return -EINVAL;
if ( copy_from_guest(&op, arg, 1) )
return -EFAULT;
if ( op.length == 0 )
return -EINVAL;
write_lock(&state->lock);
for ( i = 0; i < state->num_ranges; i++ )
{
if ( state->ranges[i].active &&
state->ranges[i].gpa_start == op.gpa_start &&
state->ranges[i].gpa_end == op.gpa_start + op.length )
{
write_unlock(&state->lock);
return 0;
}
}
for ( i = 0; i < TURNSTILE_MAX_RANGES; i++ )
{
if ( !state->ranges[i].active )
{
slot = i;
break;
}
}
if ( slot < 0 )
{
write_unlock(&state->lock);
return -ENOSPC;
}
state->ranges[slot].gpa_start = op.gpa_start;
state->ranges[slot].gpa_end = op.gpa_start + op.length;
state->ranges[slot].active = true;
state->ranges[slot].original_type = p2m_ram_rw;
if ( (unsigned int)slot >= state->num_ranges )
state->num_ranges = slot + 1;
write_unlock(&state->lock);
return 0;
}
static long turnstile_unprotect_range(struct domain *d,
XEN_GUEST_HANDLE_PARAM(void) arg)
{
struct turnstile_domain_state *state = get_turnstile_state(d);
struct turnstile_op_protect_range op;
unsigned int i;
bool found = false;
if ( !state )
return -EINVAL;
if ( copy_from_guest(&op, arg, 1) )
return -EFAULT;
write_lock(&state->lock);
for ( i = 0; i < state->num_ranges; i++ )
{
if ( state->ranges[i].active &&
state->ranges[i].gpa_start == op.gpa_start )
{
state->ranges[i].active = false;
found = true;
break;
}
}
write_unlock(&state->lock);
return found ? 0 : -ENOENT;
}
static long turnstile_request_write(struct domain *d,
XEN_GUEST_HANDLE_PARAM(void) arg)
{
struct turnstile_domain_state *state = get_turnstile_state(d);
struct turnstile_op_request_write op;
if ( !state )
return -EINVAL;
if ( copy_from_guest(&op, arg, 1) )
return -EFAULT;
if ( op.timeout_ms > 1000 )
return -EINVAL;
if ( op.length == 0 )
return -EINVAL;
write_lock(&state->lock);
if ( !gpa_in_range(state, op.gpa_start) )
{
write_unlock(&state->lock);
return -ENOENT;
}
state->write_grant.gpa_start = op.gpa_start;
state->write_grant.gpa_end = op.gpa_start + op.length;
state->write_grant.expiry = NOW() + MILLISECS(op.timeout_ms);
state->write_grant.active = true;
write_unlock(&state->lock);
return 0;
}
static long turnstile_get_violations(struct domain *d,
XEN_GUEST_HANDLE_PARAM(void) arg)
{
struct turnstile_domain_state *state = get_turnstile_state(d);
struct turnstile_op_get_violations op;
void *buf_ptr;
uint32_t count = 0;
if ( !state )
return -EINVAL;
if ( copy_from_guest(&op, arg, 1) )
return -EFAULT;
if ( op.max_entries == 0 )
return -EINVAL;
buf_ptr = (void *)(unsigned long)op.buffer_ptr;
write_lock(&state->lock);
while ( state->ring_tail != state->ring_head && count < op.max_entries )
{
struct turnstile_violation *v = &state->ring[state->ring_tail];
void *dest = buf_ptr + (count * sizeof(struct turnstile_violation));
if ( raw_copy_to_guest(dest, v, sizeof(struct turnstile_violation)) )
{
write_unlock(&state->lock);
return -EFAULT;
}
state->ring_tail = (state->ring_tail + 1) % TURNSTILE_RING_SIZE;
count++;
}
op.entries_returned = count;
op.overflow_count = state->overflow_count;
state->overflow_count = 0;
write_unlock(&state->lock);
if ( copy_to_guest(arg, &op, 1) )
return -EFAULT;
return 0;
}
static long turnstile_get_stats(struct domain *d,
XEN_GUEST_HANDLE_PARAM(void) arg)
{
struct turnstile_domain_state *state = get_turnstile_state(d);
struct turnstile_stats stats;
if ( !state )
return -EINVAL;
read_lock(&state->lock);
stats = state->stats;
read_unlock(&state->lock);
if ( copy_to_guest(arg, &stats, 1) )
return -EFAULT;
return 0;
}
static long turnstile_upload_metadata(struct domain *d,
XEN_GUEST_HANDLE_PARAM(void) arg)
{
struct turnstile_domain_state *state = get_turnstile_state(d);
struct turnstile_op_upload_metadata op;
void *buf_ptr;
uint64_t *new_entries;
unsigned int alloc_count;
if ( !state )
return -EINVAL;
if ( copy_from_guest(&op, arg, 1) )
return -EFAULT;
if ( op.count == 0 )
return -EINVAL;
if ( op.metadata_type == TURNSTILE_META_FUNC_ENTRIES )
{
if ( op.count > TURNSTILE_MAX_FUNC_ENTRIES )
return -EINVAL;
alloc_count = op.count;
}
else if ( op.metadata_type == TURNSTILE_META_JUMP_ENTRIES )
{
if ( op.count > TURNSTILE_MAX_JUMP_ENTRIES )
return -EINVAL;
alloc_count = op.count * 3;
}
else
{
return -EINVAL;
}
new_entries = xmalloc_array(uint64_t, alloc_count);
if ( !new_entries )
return -ENOMEM;
buf_ptr = (void *)(unsigned long)op.buffer_ptr;
if ( raw_copy_from_guest(new_entries, buf_ptr, alloc_count * sizeof(uint64_t)) )
{
xfree(new_entries);
return -EFAULT;
}
write_lock(&state->lock);
if ( op.metadata_type == TURNSTILE_META_FUNC_ENTRIES )
{
xfree(state->func_entries);
state->func_entries = new_entries;
state->num_func_entries = op.count;
}
else
{
xfree(state->jump_entries);
state->jump_entries = new_entries;
state->num_jump_entries = op.count;
}
write_unlock(&state->lock);
return 0;
}
long do_turnstile_op(unsigned int op, domid_t domid, XEN_GUEST_HANDLE_PARAM(void) arg)
{
struct domain *d;
long ret;
bool self = false;
if ( domid == DOMID_SELF )
{
if ( op != TURNSTILE_OP_REQUEST_WRITE )
return -EPERM;
d = current->domain;
get_knownalive_domain(d);
self = true;
}
else
{
if ( !is_control_domain(current->domain) )
return -EPERM;
d = get_domain_by_id(domid);
if ( !d )
return -ESRCH;
}
if ( !is_hvm_domain(d) )
{
put_domain(d);
return -EINVAL;
}
if ( !get_turnstile_state(d) )
{
if ( self )
{
put_domain(d);
return -EINVAL;
}
ret = turnstile_domain_init(d);
if ( ret )
{
put_domain(d);
return ret;
}
}
switch ( op )
{
case TURNSTILE_OP_SET_POLICY:
ret = turnstile_set_policy(d, arg);
break;
case TURNSTILE_OP_PROTECT_RANGE:
ret = turnstile_protect_range(d, arg);
break;
case TURNSTILE_OP_UNPROTECT_RANGE:
ret = turnstile_unprotect_range(d, arg);
break;
case TURNSTILE_OP_REQUEST_WRITE:
ret = turnstile_request_write(d, arg);
break;
case TURNSTILE_OP_GET_VIOLATIONS:
ret = turnstile_get_violations(d, arg);
break;
case TURNSTILE_OP_GET_STATS:
ret = turnstile_get_stats(d, arg);
break;
case TURNSTILE_OP_UPLOAD_METADATA:
ret = turnstile_upload_metadata(d, arg);
break;
default:
ret = -ENOSYS;
}
put_domain(d);
return ret;
}