#include #include #include #include #include #include #include #include #include static struct turnstile_domain_state *get_turnstile_state(struct domain *d) { return d->arch.turnstile; } int turnstile_domain_init(struct domain *d) { struct turnstile_domain_state *state; state = xzalloc(struct turnstile_domain_state); if ( !state ) return -ENOMEM; rwlock_init(&state->lock); state->policy = TURNSTILE_POLICY_DISABLED; state->ring_head = 0; state->ring_tail = 0; state->ring_seq = 0; state->overflow_count = 0; state->num_ranges = 0; state->func_entries = NULL; state->num_func_entries = 0; state->jump_entries = NULL; state->num_jump_entries = 0; state->write_grant.active = false; d->arch.turnstile = state; return 0; } void turnstile_domain_destroy(struct domain *d) { struct turnstile_domain_state *state = get_turnstile_state(d); if ( !state ) return; xfree(state->func_entries); xfree(state->jump_entries); xfree(state); d->arch.turnstile = NULL; } static bool gpa_in_range(struct turnstile_domain_state *state, uint64_t gpa) { unsigned int i; for ( i = 0; i < state->num_ranges; i++ ) { if ( state->ranges[i].active && gpa >= state->ranges[i].gpa_start && gpa < state->ranges[i].gpa_end ) return true; } return false; } static bool check_write_grant(struct turnstile_domain_state *state, uint64_t gpa, bool *grant_expired) { *grant_expired = false; if ( !state->write_grant.active ) return false; if ( NOW() > state->write_grant.expiry ) { *grant_expired = true; return false; } return gpa >= state->write_grant.gpa_start && gpa < state->write_grant.gpa_end; } static int check_implicit_ftrace(struct turnstile_domain_state *state, uint64_t gpa, const uint8_t *insn_bytes) { unsigned int i; uint64_t vaddr = gpa + 0xffff800000000000ULL; for ( i = 0; i < state->num_func_entries; i++ ) { if ( state->func_entries[i] == vaddr ) { if ( insn_bytes[0] == 0xe8 ) return TURNSTILE_IMPLICIT_FTRACE; if ( insn_bytes[0] == 0x0f && insn_bytes[1] == 0x1f && insn_bytes[2] == 0x44 && insn_bytes[3] == 0x00 && insn_bytes[4] == 0x00 ) return TURNSTILE_IMPLICIT_FTRACE; if ( insn_bytes[0] == 0x90 || (insn_bytes[0] == 0x66 && insn_bytes[1] == 0x90) ) return TURNSTILE_IMPLICIT_FTRACE; } } return TURNSTILE_IMPLICIT_DENIED; } static int check_implicit_static_key(struct turnstile_domain_state *state, uint64_t gpa, const uint8_t *insn_bytes) { unsigned int i; uint64_t vaddr = gpa + 0xffff800000000000ULL; for ( i = 0; i < state->num_jump_entries; i++ ) { uint64_t code_addr = state->jump_entries[i * 3]; if ( code_addr == vaddr ) { if ( insn_bytes[0] == 0x0f && insn_bytes[1] == 0x1f ) return TURNSTILE_IMPLICIT_STATIC; if ( insn_bytes[0] == 0xe9 ) return TURNSTILE_IMPLICIT_STATIC; if ( insn_bytes[0] == 0xeb ) return TURNSTILE_IMPLICIT_STATIC; if ( insn_bytes[0] == 0x90 ) return TURNSTILE_IMPLICIT_STATIC; if ( insn_bytes[0] == 0x66 && insn_bytes[1] == 0x90 ) return TURNSTILE_IMPLICIT_STATIC; } } return TURNSTILE_IMPLICIT_DENIED; } static void log_violation(struct turnstile_domain_state *state, uint64_t gpa, uint64_t rip, uint64_t cr3, uint32_t access_flags, uint32_t response, const uint8_t *insn_bytes) { struct turnstile_violation *v; uint32_t head; uint32_t next_head; head = state->ring_head; next_head = (head + 1) % TURNSTILE_RING_SIZE; if ( next_head == state->ring_tail ) { state->overflow_count++; state->stats.ring_overflows++; state->ring_tail = (state->ring_tail + 1) % TURNSTILE_RING_SIZE; } v = &state->ring[head]; v->timestamp = NOW(); v->gpa = gpa; v->rip = rip; v->cr3 = cr3; v->access_flags = access_flags; v->response = response; v->seq = state->ring_seq++; memcpy(v->insn_bytes, insn_bytes, 16); state->ring_head = next_head; state->stats.violations_total++; if ( response ) state->stats.violations_blocked++; else state->stats.violations_allowed++; } int turnstile_check_violation(struct domain *d, uint64_t gpa, uint64_t rip, uint64_t cr3, uint32_t access_flags, const uint8_t *insn_bytes) { struct turnstile_domain_state *state = get_turnstile_state(d); int implicit; int block = 0; bool in_range; bool grant_valid; bool grant_expired; uint32_t policy; if ( !state || state->policy == TURNSTILE_POLICY_DISABLED ) return 0; read_lock(&state->lock); in_range = gpa_in_range(state, gpa); if ( !in_range ) { read_unlock(&state->lock); return 0; } grant_valid = check_write_grant(state, gpa, &grant_expired); if ( grant_valid ) { read_unlock(&state->lock); write_lock(&state->lock); state->stats.explicit_write_grants++; write_unlock(&state->lock); return 0; } implicit = check_implicit_ftrace(state, gpa, insn_bytes); if ( implicit == TURNSTILE_IMPLICIT_FTRACE ) { read_unlock(&state->lock); write_lock(&state->lock); state->stats.implicit_ftrace++; write_unlock(&state->lock); return 0; } implicit = check_implicit_static_key(state, gpa, insn_bytes); if ( implicit == TURNSTILE_IMPLICIT_STATIC ) { read_unlock(&state->lock); write_lock(&state->lock); state->stats.implicit_static_key++; write_unlock(&state->lock); return 0; } policy = state->policy; read_unlock(&state->lock); if ( policy == TURNSTILE_POLICY_ENFORCE ) block = 1; write_lock(&state->lock); if ( grant_expired ) state->write_grant.active = false; log_violation(state, gpa, rip, cr3, access_flags, block, insn_bytes); write_unlock(&state->lock); return block; } static long turnstile_set_policy(struct domain *d, XEN_GUEST_HANDLE_PARAM(void) arg) { struct turnstile_domain_state *state = get_turnstile_state(d); struct turnstile_op_set_policy op; if ( !state ) return -EINVAL; if ( copy_from_guest(&op, arg, 1) ) return -EFAULT; if ( op.policy > TURNSTILE_POLICY_ENFORCE ) return -EINVAL; write_lock(&state->lock); state->policy = op.policy; write_unlock(&state->lock); return 0; } static long turnstile_protect_range(struct domain *d, XEN_GUEST_HANDLE_PARAM(void) arg) { struct turnstile_domain_state *state = get_turnstile_state(d); struct turnstile_op_protect_range op; unsigned int i; int slot = -1; if ( !state ) return -EINVAL; if ( copy_from_guest(&op, arg, 1) ) return -EFAULT; if ( op.length == 0 ) return -EINVAL; write_lock(&state->lock); for ( i = 0; i < state->num_ranges; i++ ) { if ( state->ranges[i].active && state->ranges[i].gpa_start == op.gpa_start && state->ranges[i].gpa_end == op.gpa_start + op.length ) { write_unlock(&state->lock); return 0; } } for ( i = 0; i < TURNSTILE_MAX_RANGES; i++ ) { if ( !state->ranges[i].active ) { slot = i; break; } } if ( slot < 0 ) { write_unlock(&state->lock); return -ENOSPC; } state->ranges[slot].gpa_start = op.gpa_start; state->ranges[slot].gpa_end = op.gpa_start + op.length; state->ranges[slot].active = true; state->ranges[slot].original_type = p2m_ram_rw; if ( (unsigned int)slot >= state->num_ranges ) state->num_ranges = slot + 1; write_unlock(&state->lock); return 0; } static long turnstile_unprotect_range(struct domain *d, XEN_GUEST_HANDLE_PARAM(void) arg) { struct turnstile_domain_state *state = get_turnstile_state(d); struct turnstile_op_protect_range op; unsigned int i; bool found = false; if ( !state ) return -EINVAL; if ( copy_from_guest(&op, arg, 1) ) return -EFAULT; write_lock(&state->lock); for ( i = 0; i < state->num_ranges; i++ ) { if ( state->ranges[i].active && state->ranges[i].gpa_start == op.gpa_start ) { state->ranges[i].active = false; found = true; break; } } write_unlock(&state->lock); return found ? 0 : -ENOENT; } static long turnstile_request_write(struct domain *d, XEN_GUEST_HANDLE_PARAM(void) arg) { struct turnstile_domain_state *state = get_turnstile_state(d); struct turnstile_op_request_write op; if ( !state ) return -EINVAL; if ( copy_from_guest(&op, arg, 1) ) return -EFAULT; if ( op.timeout_ms > 1000 ) return -EINVAL; if ( op.length == 0 ) return -EINVAL; write_lock(&state->lock); if ( !gpa_in_range(state, op.gpa_start) ) { write_unlock(&state->lock); return -ENOENT; } state->write_grant.gpa_start = op.gpa_start; state->write_grant.gpa_end = op.gpa_start + op.length; state->write_grant.expiry = NOW() + MILLISECS(op.timeout_ms); state->write_grant.active = true; write_unlock(&state->lock); return 0; } static long turnstile_get_violations(struct domain *d, XEN_GUEST_HANDLE_PARAM(void) arg) { struct turnstile_domain_state *state = get_turnstile_state(d); struct turnstile_op_get_violations op; void *buf_ptr; uint32_t count = 0; if ( !state ) return -EINVAL; if ( copy_from_guest(&op, arg, 1) ) return -EFAULT; if ( op.max_entries == 0 ) return -EINVAL; buf_ptr = (void *)(unsigned long)op.buffer_ptr; write_lock(&state->lock); while ( state->ring_tail != state->ring_head && count < op.max_entries ) { struct turnstile_violation *v = &state->ring[state->ring_tail]; void *dest = buf_ptr + (count * sizeof(struct turnstile_violation)); if ( raw_copy_to_guest(dest, v, sizeof(struct turnstile_violation)) ) { write_unlock(&state->lock); return -EFAULT; } state->ring_tail = (state->ring_tail + 1) % TURNSTILE_RING_SIZE; count++; } op.entries_returned = count; op.overflow_count = state->overflow_count; state->overflow_count = 0; write_unlock(&state->lock); if ( copy_to_guest(arg, &op, 1) ) return -EFAULT; return 0; } static long turnstile_get_stats(struct domain *d, XEN_GUEST_HANDLE_PARAM(void) arg) { struct turnstile_domain_state *state = get_turnstile_state(d); struct turnstile_stats stats; if ( !state ) return -EINVAL; read_lock(&state->lock); stats = state->stats; read_unlock(&state->lock); if ( copy_to_guest(arg, &stats, 1) ) return -EFAULT; return 0; } static long turnstile_upload_metadata(struct domain *d, XEN_GUEST_HANDLE_PARAM(void) arg) { struct turnstile_domain_state *state = get_turnstile_state(d); struct turnstile_op_upload_metadata op; void *buf_ptr; uint64_t *new_entries; unsigned int alloc_count; if ( !state ) return -EINVAL; if ( copy_from_guest(&op, arg, 1) ) return -EFAULT; if ( op.count == 0 ) return -EINVAL; if ( op.metadata_type == TURNSTILE_META_FUNC_ENTRIES ) { if ( op.count > TURNSTILE_MAX_FUNC_ENTRIES ) return -EINVAL; alloc_count = op.count; } else if ( op.metadata_type == TURNSTILE_META_JUMP_ENTRIES ) { if ( op.count > TURNSTILE_MAX_JUMP_ENTRIES ) return -EINVAL; alloc_count = op.count * 3; } else { return -EINVAL; } new_entries = xmalloc_array(uint64_t, alloc_count); if ( !new_entries ) return -ENOMEM; buf_ptr = (void *)(unsigned long)op.buffer_ptr; if ( raw_copy_from_guest(new_entries, buf_ptr, alloc_count * sizeof(uint64_t)) ) { xfree(new_entries); return -EFAULT; } write_lock(&state->lock); if ( op.metadata_type == TURNSTILE_META_FUNC_ENTRIES ) { xfree(state->func_entries); state->func_entries = new_entries; state->num_func_entries = op.count; } else { xfree(state->jump_entries); state->jump_entries = new_entries; state->num_jump_entries = op.count; } write_unlock(&state->lock); return 0; } long do_turnstile_op(unsigned int op, domid_t domid, XEN_GUEST_HANDLE_PARAM(void) arg) { struct domain *d; long ret; bool self = false; if ( domid == DOMID_SELF ) { if ( op != TURNSTILE_OP_REQUEST_WRITE ) return -EPERM; d = current->domain; get_knownalive_domain(d); self = true; } else { if ( !is_control_domain(current->domain) ) return -EPERM; d = get_domain_by_id(domid); if ( !d ) return -ESRCH; } if ( !is_hvm_domain(d) ) { put_domain(d); return -EINVAL; } if ( !get_turnstile_state(d) ) { if ( self ) { put_domain(d); return -EINVAL; } ret = turnstile_domain_init(d); if ( ret ) { put_domain(d); return ret; } } switch ( op ) { case TURNSTILE_OP_SET_POLICY: ret = turnstile_set_policy(d, arg); break; case TURNSTILE_OP_PROTECT_RANGE: ret = turnstile_protect_range(d, arg); break; case TURNSTILE_OP_UNPROTECT_RANGE: ret = turnstile_unprotect_range(d, arg); break; case TURNSTILE_OP_REQUEST_WRITE: ret = turnstile_request_write(d, arg); break; case TURNSTILE_OP_GET_VIOLATIONS: ret = turnstile_get_violations(d, arg); break; case TURNSTILE_OP_GET_STATS: ret = turnstile_get_stats(d, arg); break; case TURNSTILE_OP_UPLOAD_METADATA: ret = turnstile_upload_metadata(d, arg); break; default: ret = -ENOSYS; } put_domain(d); return ret; }