1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
open Ast
(*
--# Standard Deviation
--#
--# Calculates the sample standard deviation of a numeric vector.
--# With `weights`, uses the weighted population denominator (`sum(weights)`).
--#
--# @name sd
--# @param x :: Vector | List The numeric data.
--# @param na_rm :: Bool (Optional) logical. Should missing values be removed? Default is false.
--# @param weights :: Vector[Float] | List[Float] = NA Optional non-negative observation weights.
--# @return :: Float The standard deviation.
--# @example
--# sd([1, 2, 3, 4, 5])
--# -- Returns = 1.5811...
--# @family stats
--# @seealso mean, var
--# @export
*)
let register env =
Env.add "sd"
(make_builtin_named ~name:"sd" ~variadic:true 1 (fun named_args _env ->
match Math_common.get_bool_flag "na_rm" false named_args with
| Error e -> e
| Ok na_rm ->
let args = Math_common.positional_args_without ["na_rm"; "weights"] named_args in
let weight_arg = Math_common.optional_named_arg "weights" named_args in
let extract_nums_arr label arr =
let len = Array.length arr in
let had_error = ref None in
let result = Array.make len 0.0 in
for i = 0 to len - 1 do
if !had_error = None then
match arr.(i) with
| VInt n -> result.(i) <- float_of_int n
| VFloat f -> result.(i) <- f
| VNA _ -> had_error := Some (Error.na_value_error ~na_rm:true label)
| _ -> had_error := Some (Error.type_error (Printf.sprintf "Function `%s` requires numeric values." label))
done;
match !had_error with Some e -> Error e | None -> Ok result
in
let extract_nums_arr_na_rm label arr =
let nums = ref [] in
let had_error = ref None in
for i = 0 to Array.length arr - 1 do
if !had_error = None then
match arr.(i) with
| VInt n -> nums := float_of_int n :: !nums
| VFloat f -> nums := f :: !nums
| VNA _ -> ()
| _ -> had_error := Some (Error.type_error (Printf.sprintf "Function `%s` requires numeric values." label))
done;
match !had_error with Some e -> Error e | None -> Ok (Array.of_list (List.rev !nums))
in
let compute_sd nums n =
if n < 2 then Error.value_error "Function `sd` requires at least 2 values."
else
let mean = Array.fold_left ( +. ) 0.0 nums /. float_of_int n in
let sum_sq = Array.fold_left (fun acc x -> acc +. (x -. mean) *. (x -. mean)) 0.0 nums in
VFloat (Float.sqrt (sum_sq /. float_of_int (n - 1)))
in
let first_arg = match args with a :: _ -> Some a | [] -> None in
(match first_arg with
| Some (VList items) ->
(match weight_arg with
| Some weight_v ->
(match Math_utils.extract_numeric_array_with_weights ~label:"sd" ~na_rm (VList items) weight_v with
| Error e -> e
| Ok (xs, ws) ->
if Array.length xs < 2 then Error.value_error "Function `sd` requires at least 2 values."
else
(match Math_utils.weighted_variance_population xs ws with
| Some v -> VFloat (Float.sqrt v)
| None -> Error.make_error RuntimeError "Function `sd` internal error: weighted variance could not be computed."))
| None ->
let arr = Array.of_list (List.map snd items) in
if na_rm then
(match extract_nums_arr_na_rm "sd" arr with
| Error e -> e
| Ok nums when Array.length nums = 0 -> VNA NAFloat
| Ok nums -> compute_sd nums (Array.length nums))
else
(match extract_nums_arr "sd" arr with
| Error e -> e
| Ok nums -> compute_sd nums (Array.length nums)))
| Some (VVector arr) ->
(match weight_arg with
| Some weight_v ->
(match Math_utils.extract_numeric_array_with_weights ~label:"sd" ~na_rm (VVector arr) weight_v with
| Error e -> e
| Ok (xs, ws) ->
if Array.length xs < 2 then Error.value_error "Function `sd` requires at least 2 values."
else
(match Math_utils.weighted_variance_population xs ws with
| Some v -> VFloat (Float.sqrt v)
| None -> Error.make_error RuntimeError "Function `sd` internal error: weighted variance could not be computed."))
| None ->
if na_rm then
(match extract_nums_arr_na_rm "sd" arr with
| Error e -> e
| Ok nums when Array.length nums = 0 -> VNA NAFloat
| Ok nums -> compute_sd nums (Array.length nums))
else
(match extract_nums_arr "sd" arr with
| Error e -> e
| Ok nums -> compute_sd nums (Array.length nums)))
| Some (VNA _) -> Error.na_value_error ~na_rm:true "sd"
| Some _ -> Error.type_error "Function `sd` expects a numeric List or Vector."
| None -> Error.arity_error_named "sd" 1 (List.length args))
))
env