1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
(* src/packages/stats/sigma.ml *)
open Ast
(** sigma(model) — returns the residual standard deviation (sigma) of a linear model. *)
(*
--# Residual Standard Deviation
--#
--# Returns the residual standard deviation (sigma) of a linear model.
--# For GLMs, use `dispersion()` instead.
--#
--# @name sigma
--# @param model :: Model The model object.
--# @return :: Float The Residual Standard Error.
--#
--# @details
--# Sigma ($\hat{\sigma}$) represents the Residual Standard Error (RSE), which is an estimate
--# of the standard deviation of the error term $\epsilon$. It measures the "average"
--# distance that the observed values fall from the regression line.
--#
--# For OLS, it is calculated as = $\hat{\sigma} = \sqrt{\frac{\sum r_i^2}{n - p}}$, where
--# $n$ is the number of observations and $p$ is the number of estimated parameters.
--#
--# @example
--# model = lm(mpg ~ wt, data = mtcars)
--# s = sigma(model)
--# @family stats
--# @seealso dispersion
--# @export
*)
let register env =
Env.add "sigma"
(make_builtin ~name:"sigma" 1 (fun args _env ->
match args with
| [VDict pairs] ->
let model_data = match List.assoc_opt "_model_data" pairs with
| Some (VDict d) -> d
| _ -> pairs
in
(match List.assoc_opt "sigma" model_data with
| Some (VFloat f) -> VFloat f
| Some (VInt i) -> VFloat (float_of_int i)
| _ ->
if List.mem_assoc "dispersion" model_data then
Error.type_error "Function `sigma` not applicable for this model. Use `dispersion()` instead."
else
Error.type_error "Function `sigma` could not find 'sigma' in model object.")
| [VError _ as e] -> e
| _ -> Error.type_error "Function `sigma` expects a model (Dict)."
)) env