1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
open Ast

(*
--# Covariance
--#
--# Compute sample covariance of two numeric vectors.
--#
--# @name cov
--# @param x :: Vector | List First numeric input.
--# @param y :: Vector | List Second numeric input.
--# @param na_rm :: Bool = false Pairwise remove NA values.
--# @param weights :: Vector[Float] | List[Float] = NA Optional non-negative observation weights.
--# @return :: Number | Vector Computed result (scalar or vectorized).
--# @family stats
--# @export
*)

let has_na_rm named_args =
  List.exists (fun (name, v) -> name = Some "na_rm" && match v with VBool true -> true | _ -> false) named_args

let strip_na_rm named_args =
  List.filter (fun (name, _) -> name <> Some "na_rm") named_args |> List.map snd

let numeric_values ~label ~na_rm v =
  let vals =
    match v with
    | VVector arr -> Ok (Array.to_list arr)
    | VList items -> Ok (List.map snd items)
    | VNA _ -> Error (Error.na_value_error ~na_rm:true label)
    | _ -> Error (Error.type_error (Printf.sprintf "Function `%s` expects a numeric List or Vector." label))
  in
  match vals with
  | Error e -> Error e
  | Ok vals ->
      let rec go acc = function
        | [] -> Ok (List.rev acc)
        | VInt n :: tl -> go (float_of_int n :: acc) tl
        | VFloat f :: tl -> go (f :: acc) tl
        | VNA _ :: tl when na_rm -> go acc tl
        | VNA _ :: _ -> Error (Error.na_value_error ~na_rm:true label)
        | _ -> Error (Error.type_error (Printf.sprintf "Function `%s` requires numeric values." label))
      in
      go [] vals

let quantile xs p =
  let arr = Array.of_list xs in
  let n = Array.length arr in
  if n = 0 then None
  else (
    Array.sort compare arr;
    let h = p *. float_of_int (n - 1) in
    let lo = int_of_float (Float.floor h) in
    let hi = min (lo + 1) (n - 1) in
    let frac = h -. float_of_int lo in
    Some (arr.(lo) +. frac *. (arr.(hi) -. arr.(lo))))

let mean xs =
  let n = List.length xs in
  if n = 0 then None else Some (List.fold_left ( +. ) 0.0 xs /. float_of_int n)

let vecf xs = VVector (Array.of_list (List.map (fun x -> VFloat x) xs))

let paired_numeric_values ~label ~na_rm x y =
  let to_arr = function
    | VVector arr -> Ok arr
    | VList items -> Ok (Array.of_list (List.map snd items))
    | VNA _ -> Error (Error.na_value_error ~na_rm:true label)
    | _ -> Error (Error.type_error (Printf.sprintf "Function `%s` expects two numeric Vectors or Lists." label))
  in
  match (to_arr x, to_arr y) with
  | Ok ax, Ok ay ->
      if Array.length ax <> Array.length ay then Error (Error.value_error (Printf.sprintf "Function `%s` requires vectors of equal length." label))
      else
        let rec loop i xs ys =
          if i = Array.length ax then Ok (List.rev xs, List.rev ys)
          else
            match (ax.(i), ay.(i)) with
            | VInt a, VInt b -> loop (i + 1) (float_of_int a :: xs) (float_of_int b :: ys)
            | VInt a, VFloat b -> loop (i + 1) (float_of_int a :: xs) (b :: ys)
            | VFloat a, VInt b -> loop (i + 1) (a :: xs) (float_of_int b :: ys)
            | VFloat a, VFloat b -> loop (i + 1) (a :: xs) (b :: ys)
            | (VNA _, _) | (_, VNA _) when na_rm -> loop (i + 1) xs ys
            | (VNA _, _) | (_, VNA _) -> Error (Error.na_value_error ~na_rm:true label)
            | _ -> Error (Error.type_error (Printf.sprintf "Function `%s` requires numeric values." label))
        in
        loop 0 [] []
  | Error e, _ | _, Error e -> Error e

let register env =
  Env.add "cov" (make_builtin_named ~name:"cov" ~variadic:true 2 (fun named_args _ ->
    let na_rm = has_na_rm named_args in
    let weight_arg = Math_common.optional_named_arg "weights" named_args in
    let args =
      named_args
      |> List.filter (fun (name, _) -> name <> Some "na_rm" && name <> Some "weights")
      |> List.map snd
    in
    match args with
    | [x; y] ->
        (match weight_arg with
         | Some weight_v ->
             (match Math_utils.extract_paired_numeric_arrays_with_weights ~label:"cov" ~na_rm x y weight_v with
              | Error e -> e
              | Ok (xs, ys, ws) ->
                  if Array.length xs < 2 then Error.value_error "Function `cov` requires at least 2 paired values."
                  else
                    (match Math_utils.weighted_covariance_population xs ys ws with
                     | Some v -> VFloat v
                     | None -> Error.make_error RuntimeError "Function `cov` internal error: weighted covariance could not be computed."))
         | None ->
             (match paired_numeric_values ~label:"cov" ~na_rm x y with
              | Error e -> e
              | Ok (xs, ys) ->
                  let n = List.length xs in
                  if n = 0 then VNA NAFloat
                  else if n < 2 then Error.value_error "Function `cov` requires at least 2 paired values."
                  else
                    match mean xs, mean ys with
                    | None, _ | _, None -> Error.make_error RuntimeError "Function `cov` internal error: mean returned None for non-empty list."
                    | Some mx, Some my ->
                    VFloat (List.fold_left2 (fun a xv yv -> a +. (xv -. mx) *. (yv -. my)) 0.0 xs ys /. float_of_int (n - 1))))
    | args -> Error.arity_error_named "cov" 2 (List.length args))) env