1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
open Ast

(*
--# Compute arithmetic mean of numeric values
--#
--# The mean is the sum of values divided by the count. This function
--# handles NA values explicitly through the na_rm parameter.
--#
--# @name mean
--# @param x :: Vector[Float] | List[Float] Input numeric data. Must contain at least one value.
--# @param na_rm :: Bool = false Remove NA values before computation.
--# @param weights :: Vector[Float] | List[Float] = NA Optional non-negative observation weights.
--# @return :: Float | NA The arithmetic mean, or NA if input contains NA and na_rm is false
--# @example
--#   mean([1, 2, 3])
--#   -- Returns = 2.0
--#
--#   mean([1, NA, 3], na_rm = true)
--#   -- Returns = 2.0
--#
--# @seealso median, sd, sum
--# @family descriptive-statistics
--# @intent
--#   purpose = "Compute central tendency of numeric data"
--#   use_when = "Summarizing distributions or comparing groups"
--#   alternatives = "Use median() for robust center; sd() for spread"
--# @export
*)
let register env =
  Env.add "mean"
    (make_builtin_named ~name:"mean" ~variadic:true 1 (fun named_args _env ->
      (match Math_common.get_bool_flag "na_rm" false named_args with
      | Error e -> e
      | Ok na_rm ->
      let args = Math_common.positional_args_without ["na_rm"; "weights"] named_args in
      let weight_arg = Math_common.optional_named_arg "weights" named_args in
      let extract_nums label vals =
        let rec go acc = function
          | [] -> Ok (List.rev acc)
          | (_, VInt n) :: rest -> go (float_of_int n :: acc) rest
          | (_, VFloat f) :: rest -> go (f :: acc) rest
          | (_, VNA _) :: rest when na_rm -> go acc rest
          | (_, VNA _) :: _ -> Error (Error.na_value_error ~na_rm:true label)
          | _ -> Error (Error.type_error (Printf.sprintf "Function `%s` requires numeric values." label))
        in go [] vals
      in
      let extract_nums_arr_na_rm label arr =
        let nums = ref [] in
        let had_error = ref None in
        for i = 0 to Array.length arr - 1 do
          if !had_error = None then
            match arr.(i) with
            | VInt n -> nums := float_of_int n :: !nums
            | VFloat f -> nums := f :: !nums
            | VNA _ when na_rm -> ()
            | VNA _ -> had_error := Some (Error.na_value_error ~na_rm:true label)
            | _ -> had_error := Some (Error.type_error (Printf.sprintf "Function `%s` requires numeric values." label))
        done;
        match !had_error with Some e -> Error e | None -> Ok (Array.of_list (List.rev !nums))
      in
      let extract_nums_arr label arr =
        let len = Array.length arr in
        let had_error = ref None in
        let result = Array.make len 0.0 in
        for i = 0 to len - 1 do
          if !had_error = None then
            match arr.(i) with
            | VInt n -> result.(i) <- float_of_int n
            | VFloat f -> result.(i) <- f
            | VNA _ -> had_error := Some (Error.na_value_error ~na_rm:true label)
            | _ -> had_error := Some (Error.type_error (Printf.sprintf "Function `%s` requires numeric values." label))
        done;
        match !had_error with Some e -> Error e | None -> Ok result
      in
      let first_arg = match args with a :: _ -> Some a | [] -> None in
      match first_arg with
      | Some (VList []) -> Error.value_error "Function `mean` called on empty List."
      | Some (VList items) ->
           (match weight_arg with
            | Some weight_v ->
                (match Math_utils.extract_numeric_array_with_weights ~label:"mean" ~na_rm (VList items) weight_v with
                 | Error e -> e
                 | Ok (xs, ws) ->
                     (match Math_utils.weighted_mean_array xs ws with
                      | Some m -> VFloat m
                      | None -> Error.value_error "Function `mean` expects `weights` to contain at least one positive value."))
            | None ->
                (match extract_nums "mean" items with
                 | Error e -> e
                 | Ok [] -> VNA NAFloat
                 | Ok nums ->
                   let sum = List.fold_left ( +. ) 0.0 nums in
                   VFloat (sum /. float_of_int (List.length nums))))
      | Some (VVector arr) when Array.length arr = 0 -> Error.value_error "Function `mean` called on empty Vector."
      | Some (VVector arr) ->
           (match weight_arg with
            | Some weight_v ->
                (match Math_utils.extract_numeric_array_with_weights ~label:"mean" ~na_rm (VVector arr) weight_v with
                 | Error e -> e
                 | Ok (xs, ws) ->
                     (match Math_utils.weighted_mean_array xs ws with
                      | Some m -> VFloat m
                      | None -> Error.value_error "Function `mean` expects `weights` to contain at least one positive value."))
            | None ->
                if na_rm then
                  (match extract_nums_arr_na_rm "mean" arr with
                   | Error e -> e
                   | Ok nums when Array.length nums = 0 -> VNA NAFloat
                   | Ok nums ->
                     let sum = Array.fold_left ( +. ) 0.0 nums in
                     VFloat (sum /. float_of_int (Array.length nums)))
                else
                  (match extract_nums_arr "mean" arr with
                   | Error e -> e
                   | Ok nums ->
                     let sum = Array.fold_left ( +. ) 0.0 nums in
                     VFloat (sum /. float_of_int (Array.length nums))))
      | Some (VNA _) -> Error.na_value_error ~na_rm:true "mean"
      | Some _ -> Error.type_error "Function `mean` expects a numeric List or Vector."
      | None -> Error.arity_error_named "mean" 1 (List.length args)
    )))
    env