314 lines
11 KiB
OCaml
314 lines
11 KiB
OCaml
open Ctypes
|
|
open Foreign
|
|
|
|
exception Icepick_error of string
|
|
|
|
type prime_strategy =
|
|
| Prime_temporal
|
|
| Prime_prefetcht2
|
|
| Prime_prefetchnta
|
|
| Prime_nt_store
|
|
|
|
type access_pattern =
|
|
| Pattern_sequential
|
|
| Pattern_reverse
|
|
| Pattern_strided
|
|
| Pattern_pointer_chase
|
|
|
|
type core_type =
|
|
| Core_any
|
|
| Core_pcore
|
|
| Core_ecore
|
|
|
|
let () = Callback.register_exception "Icepick_error" (Icepick_error "")
|
|
|
|
let () =
|
|
let _ = Dl.dlopen ~filename:"/home/oni/Projects/icepick/libicepick.so"
|
|
~flags:[Dl.RTLD_NOW; Dl.RTLD_GLOBAL] in
|
|
()
|
|
|
|
module C = struct
|
|
let icepick_strerror = foreign "icepick_strerror" (int @-> returning string)
|
|
|
|
let check_error ret =
|
|
if ret < 0 then raise (Icepick_error (icepick_strerror ret))
|
|
|
|
module Topology = struct
|
|
type t = unit ptr
|
|
let t : t typ = ptr void
|
|
|
|
let discover = foreign "icepick_discover_topology" (ptr t @-> returning int)
|
|
let free = foreign "icepick_free_topology" (t @-> returning void)
|
|
let l3_ways = foreign "icepick_topology_l3_ways" (t @-> returning uint)
|
|
let l3_size = foreign "icepick_topology_l3_size" (t @-> returning size_t)
|
|
let way_size = foreign "icepick_topology_way_size" (t @-> returning size_t)
|
|
let max_clos = foreign "icepick_topology_max_clos" (t @-> returning uint)
|
|
let ccx_count = foreign "icepick_topology_ccx_count" (t @-> returning uint)
|
|
let cat_supported = foreign "icepick_topology_cat_supported" (t @-> returning bool)
|
|
let mba_supported = foreign "icepick_topology_mba_supported" (t @-> returning bool)
|
|
let mba_is_linear = foreign "icepick_topology_mba_is_linear" (t @-> returning bool)
|
|
let max_mba_thrtl = foreign "icepick_topology_max_mba_thrtl" (t @-> returning uint)
|
|
let is_hybrid = foreign "icepick_topology_is_hybrid" (t @-> returning bool)
|
|
let pcore_count = foreign "icepick_topology_pcore_count" (t @-> returning uint)
|
|
let ecore_count = foreign "icepick_topology_ecore_count" (t @-> returning uint)
|
|
end
|
|
|
|
module Region = struct
|
|
type t = unit ptr
|
|
let t : t typ = ptr void
|
|
|
|
let ptr = foreign "icepick_region_ptr" (t @-> returning (ptr void))
|
|
let size = foreign "icepick_region_size" (t @-> returning size_t)
|
|
let clos = foreign "icepick_region_clos" (t @-> returning uint)
|
|
end
|
|
|
|
module Config = struct
|
|
type t
|
|
let t : t structure typ = structure "icepick_config_t"
|
|
let size = field t "size" size_t
|
|
let clos_id = field t "clos_id" uint
|
|
let numa_node = field t "numa_node" int
|
|
let huge_pages = field t "huge_pages" bool
|
|
let verify = field t "verify" bool
|
|
let auto_monitor = field t "auto_monitor" bool
|
|
let pmu_poll_interval_ns = field t "pmu_poll_interval_ns" uint64_t
|
|
let probe_interval_ns = field t "probe_interval_ns" uint64_t
|
|
let miss_threshold = field t "miss_threshold" uint32_t
|
|
let prime_strategy = field t "prime_strategy" int
|
|
let access_pattern = field t "access_pattern" int
|
|
let stride_bytes = field t "stride_bytes" size_t
|
|
let prime_iterations = field t "prime_iterations" uint
|
|
let mba_throttle = field t "mba_throttle" uint
|
|
let core_type = field t "core_type" int
|
|
let () = seal t
|
|
end
|
|
|
|
module Latency_stats = struct
|
|
type t
|
|
let t : t structure typ = structure "icepick_latency_stats_t"
|
|
let mean_ns = field t "mean_ns" uint64_t
|
|
let stddev_ns = field t "stddev_ns" uint64_t
|
|
let p50_ns = field t "p50_ns" uint64_t
|
|
let p99_ns = field t "p99_ns" uint64_t
|
|
let p999_ns = field t "p999_ns" uint64_t
|
|
let min_ns = field t "min_ns" uint64_t
|
|
let max_ns = field t "max_ns" uint64_t
|
|
let () = seal t
|
|
end
|
|
|
|
let lock = foreign "icepick_lock"
|
|
(Topology.t @-> ptr Config.t @-> ptr Region.t @-> returning int)
|
|
|
|
let unlock = foreign "icepick_unlock" (Region.t @-> returning int)
|
|
|
|
let verify = foreign "icepick_verify"
|
|
(Region.t @-> ptr Latency_stats.t @-> returning int)
|
|
|
|
let bench = foreign "icepick_bench"
|
|
(ptr void @-> size_t @-> size_t @-> ptr Latency_stats.t @-> returning int)
|
|
|
|
module Monitor = struct
|
|
type t = unit ptr
|
|
let t : t typ = ptr void
|
|
|
|
let start = foreign "icepick_monitor_start" (Region.t @-> ptr t @-> returning int)
|
|
let start_ex = foreign "icepick_monitor_start_ex"
|
|
(Region.t @-> uint64_t @-> uint64_t @-> uint32_t @-> ptr t @-> returning int)
|
|
let stop = foreign "icepick_monitor_stop" (t @-> returning int)
|
|
let eviction_count = foreign "icepick_monitor_eviction_count" (t @-> returning uint64_t)
|
|
let reprime_count = foreign "icepick_monitor_reprime_count" (t @-> returning uint64_t)
|
|
let is_degraded = foreign "icepick_monitor_is_degraded" (t @-> returning bool)
|
|
end
|
|
end
|
|
|
|
module Topology = struct
|
|
type t = {
|
|
ptr : C.Topology.t;
|
|
mutable released : bool;
|
|
}
|
|
|
|
let release t =
|
|
if not t.released then begin
|
|
C.Topology.free t.ptr;
|
|
t.released <- true
|
|
end
|
|
|
|
let discover () =
|
|
let ptr_ref = allocate C.Topology.t (from_voidp void null) in
|
|
let ret = C.Topology.discover ptr_ref in
|
|
C.check_error ret;
|
|
let t = { ptr = !@ ptr_ref; released = false } in
|
|
Gc.finalise release t;
|
|
t
|
|
|
|
let l3_ways t = Unsigned.UInt.to_int (C.Topology.l3_ways t.ptr)
|
|
let l3_size t = Unsigned.Size_t.to_int (C.Topology.l3_size t.ptr)
|
|
let way_size t = Unsigned.Size_t.to_int (C.Topology.way_size t.ptr)
|
|
let max_clos t = Unsigned.UInt.to_int (C.Topology.max_clos t.ptr)
|
|
let ccx_count t = Unsigned.UInt.to_int (C.Topology.ccx_count t.ptr)
|
|
let cat_supported t = C.Topology.cat_supported t.ptr
|
|
let mba_supported t = C.Topology.mba_supported t.ptr
|
|
let mba_is_linear t = C.Topology.mba_is_linear t.ptr
|
|
let max_mba_thrtl t = Unsigned.UInt.to_int (C.Topology.max_mba_thrtl t.ptr)
|
|
let is_hybrid t = C.Topology.is_hybrid t.ptr
|
|
let pcore_count t = Unsigned.UInt.to_int (C.Topology.pcore_count t.ptr)
|
|
let ecore_count t = Unsigned.UInt.to_int (C.Topology.ecore_count t.ptr)
|
|
end
|
|
|
|
module Latency_stats = struct
|
|
type t = {
|
|
mean_ns : int;
|
|
stddev_ns : int;
|
|
p50_ns : int;
|
|
p99_ns : int;
|
|
p999_ns : int;
|
|
min_ns : int;
|
|
max_ns : int;
|
|
}
|
|
|
|
let of_c stats =
|
|
{
|
|
mean_ns = Unsigned.UInt64.to_int (getf stats C.Latency_stats.mean_ns);
|
|
stddev_ns = Unsigned.UInt64.to_int (getf stats C.Latency_stats.stddev_ns);
|
|
p50_ns = Unsigned.UInt64.to_int (getf stats C.Latency_stats.p50_ns);
|
|
p99_ns = Unsigned.UInt64.to_int (getf stats C.Latency_stats.p99_ns);
|
|
p999_ns = Unsigned.UInt64.to_int (getf stats C.Latency_stats.p999_ns);
|
|
min_ns = Unsigned.UInt64.to_int (getf stats C.Latency_stats.min_ns);
|
|
max_ns = Unsigned.UInt64.to_int (getf stats C.Latency_stats.max_ns);
|
|
}
|
|
end
|
|
|
|
module Region = struct
|
|
type t = {
|
|
ptr : C.Region.t;
|
|
topo : Topology.t;
|
|
mutable released : bool;
|
|
}
|
|
|
|
let release t =
|
|
if not t.released then begin
|
|
let _ = C.unlock t.ptr in
|
|
t.released <- true
|
|
end
|
|
|
|
let prime_strategy_to_int = function
|
|
| Prime_temporal -> 0
|
|
| Prime_prefetcht2 -> 1
|
|
| Prime_prefetchnta -> 2
|
|
| Prime_nt_store -> 3
|
|
|
|
let access_pattern_to_int = function
|
|
| Pattern_sequential -> 0
|
|
| Pattern_reverse -> 1
|
|
| Pattern_strided -> 2
|
|
| Pattern_pointer_chase -> 3
|
|
|
|
let core_type_to_int = function
|
|
| Core_any -> 0
|
|
| Core_pcore -> 1
|
|
| Core_ecore -> 2
|
|
|
|
let lock topo ~size ~clos ?(numa = -1) ?(huge_pages = true) ?(verify = false)
|
|
?(prime_strategy = Prime_temporal) ?(access_pattern = Pattern_sequential)
|
|
?(stride = 0) ?(prime_iterations = 3) ?(mba_throttle = 0)
|
|
?(core_type = Core_any) () =
|
|
let cfg = make C.Config.t in
|
|
setf cfg C.Config.size (Unsigned.Size_t.of_int size);
|
|
setf cfg C.Config.clos_id (Unsigned.UInt.of_int clos);
|
|
setf cfg C.Config.numa_node numa;
|
|
setf cfg C.Config.huge_pages huge_pages;
|
|
setf cfg C.Config.verify verify;
|
|
setf cfg C.Config.auto_monitor false;
|
|
setf cfg C.Config.pmu_poll_interval_ns (Unsigned.UInt64.of_int 0);
|
|
setf cfg C.Config.probe_interval_ns (Unsigned.UInt64.of_int 0);
|
|
setf cfg C.Config.miss_threshold (Unsigned.UInt32.of_int 0);
|
|
setf cfg C.Config.prime_strategy (prime_strategy_to_int prime_strategy);
|
|
setf cfg C.Config.access_pattern (access_pattern_to_int access_pattern);
|
|
setf cfg C.Config.stride_bytes (Unsigned.Size_t.of_int stride);
|
|
setf cfg C.Config.prime_iterations (Unsigned.UInt.of_int prime_iterations);
|
|
setf cfg C.Config.mba_throttle (Unsigned.UInt.of_int mba_throttle);
|
|
setf cfg C.Config.core_type (core_type_to_int core_type);
|
|
let ptr_ref = allocate C.Region.t (from_voidp void null) in
|
|
let ret = C.lock topo.Topology.ptr (addr cfg) ptr_ref in
|
|
C.check_error ret;
|
|
let t = { ptr = !@ ptr_ref; topo; released = false } in
|
|
Gc.finalise release t;
|
|
t
|
|
|
|
let unlock t =
|
|
if not t.released then begin
|
|
let ret = C.unlock t.ptr in
|
|
t.released <- true;
|
|
C.check_error ret
|
|
end
|
|
|
|
let ptr t = raw_address_of_ptr (C.Region.ptr t.ptr)
|
|
let size t = Unsigned.Size_t.to_int (C.Region.size t.ptr)
|
|
let clos t = Unsigned.UInt.to_int (C.Region.clos t.ptr)
|
|
|
|
let to_bigarray_float64 t =
|
|
let region_ptr = C.Region.ptr t.ptr in
|
|
let region_size = Unsigned.Size_t.to_int (C.Region.size t.ptr) in
|
|
let len = region_size / 8 in
|
|
let typed_ptr = from_voidp double (to_voidp region_ptr) in
|
|
CArray.from_ptr typed_ptr len |> CArray.to_list |> Array.of_list |>
|
|
Bigarray.Array1.of_array Bigarray.Float64 Bigarray.c_layout
|
|
end
|
|
|
|
module Monitor = struct
|
|
type t = {
|
|
ptr : C.Monitor.t;
|
|
region : Region.t;
|
|
mutable stopped : bool;
|
|
}
|
|
|
|
let release t =
|
|
if not t.stopped then begin
|
|
let _ = C.Monitor.stop t.ptr in
|
|
t.stopped <- true
|
|
end
|
|
|
|
let start region =
|
|
let ptr_ref = allocate C.Monitor.t (from_voidp void null) in
|
|
let ret = C.Monitor.start region.Region.ptr ptr_ref in
|
|
C.check_error ret;
|
|
let t = { ptr = !@ ptr_ref; region; stopped = false } in
|
|
Gc.finalise release t;
|
|
t
|
|
|
|
let start_ex region ~pmu_poll_ns ~probe_ns ~miss_thresh =
|
|
let ptr_ref = allocate C.Monitor.t (from_voidp void null) in
|
|
let ret = C.Monitor.start_ex region.Region.ptr
|
|
(Unsigned.UInt64.of_int pmu_poll_ns)
|
|
(Unsigned.UInt64.of_int probe_ns)
|
|
(Unsigned.UInt32.of_int miss_thresh)
|
|
ptr_ref in
|
|
C.check_error ret;
|
|
let t = { ptr = !@ ptr_ref; region; stopped = false } in
|
|
Gc.finalise release t;
|
|
t
|
|
|
|
let stop t =
|
|
if not t.stopped then begin
|
|
let ret = C.Monitor.stop t.ptr in
|
|
t.stopped <- true;
|
|
C.check_error ret
|
|
end
|
|
|
|
let eviction_count t = Unsigned.UInt64.to_int (C.Monitor.eviction_count t.ptr)
|
|
let reprime_count t = Unsigned.UInt64.to_int (C.Monitor.reprime_count t.ptr)
|
|
let is_degraded t = C.Monitor.is_degraded t.ptr
|
|
end
|
|
|
|
let verify region =
|
|
let stats = make C.Latency_stats.t in
|
|
let ret = C.verify region.Region.ptr (addr stats) in
|
|
C.check_error ret;
|
|
Latency_stats.of_c stats
|
|
|
|
let bench ptr ~size ~iterations =
|
|
let stats = make C.Latency_stats.t in
|
|
let p = ptr_of_raw_address ptr in
|
|
let ret = C.bench p (Unsigned.Size_t.of_int size) (Unsigned.Size_t.of_int iterations) (addr stats) in
|
|
C.check_error ret;
|
|
Latency_stats.of_c stats
|