1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
open Ast
(*
--# Confidence Intervals for Model Coefficients
--#
--# Computes confidence intervals for model coefficients based on the Student's t distribution.
--#
--# @name conf_int
--# @param model :: Model The model object (e.g., from lm() or imported).
--# @param level :: Float Confidence level (default 0.95).
--# @return :: DataFrame Columns = `term`, `lower`, `upper`.
--# @example
--# conf_int(model)
--# conf_int(model, 0.99)
--# @family stats
--# @seealso coef, summary
--# @export
*)
let conf_int_impl args _env =
match args with
| [VError _ as e] -> e
| _ ->
let parse_args = match args with
| [VDict p] -> Ok (p, 0.95)
| [VDict p; VFloat l] -> Ok (p, l)
| [VDict p; VInt i] -> Ok (p, float_of_int i)
| _ -> Error (Error.type_error "Function `conf_int` expects model or (model, level).")
in
match parse_args with
| Error e -> e
| Ok (model_pairs, level) ->
if level <= 0.0 || level >= 1.0 then
Error.type_error "Function `conf_int` level must be between 0 and 1 (e.g. 0.95)."
else
match List.assoc_opt "_tidy_df" model_pairs, List.assoc_opt "_model_data" model_pairs with
| Some (VDataFrame tidy), Some (VDict model) ->
let family = match List.assoc_opt "family" model with
| Some (VString f) -> String.lowercase_ascii f
| _ -> "gaussian"
in
let df_opt = match List.assoc_opt "df_residual" model with
| Some (VInt i) -> Some i
| Some (VFloat f) -> Some (int_of_float f)
| _ -> None
in
(* Use t-distribution only for Gaussian models with known df.
Otherwise (GLMs like binomial/poisson), use normal approximation. *)
let use_df = if family = "gaussian" then df_opt else None in
let alpha = 1.0 -. level in
let crit = Stats.quantile (1.0 -. alpha /. 2.0) use_df in
let terms = Arrow_table.get_string_column tidy.arrow_table "term" in
let ests = Arrow_table.get_float_column tidy.arrow_table "estimate" in
let ses = Arrow_table.get_float_column tidy.arrow_table "std_error" in
let n = Array.length terms in
let lowers = Array.init n (fun i ->
match ests.(i), ses.(i) with
| Some e, Some se -> Some (e -. crit *. se)
| _ -> None)
in
let uppers = Array.init n (fun i ->
match ests.(i), ses.(i) with
| Some e, Some se -> Some (e +. crit *. se)
| _ -> None)
in
let columns = [
("term", Arrow_table.StringColumn terms);
("lower", Arrow_table.FloatColumn lowers);
("upper", Arrow_table.FloatColumn uppers);
] in
let table = Arrow_table.create columns n in
VDataFrame { arrow_table = table; group_keys = [] }
| _ -> Error.type_error "Function `conf_int` expects a model returned by `lm` or `glm`."
let register env =
Env.add "conf_int"
(make_builtin ~name:"conf_int" ~variadic:true 1 conf_int_impl)
env