1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
(* src/packages/stats/basis.ml *)
open Ast

let to_float_array = function
  | VVector arr -> arr |> Array.to_list |> List.filter_map (function VFloat f -> Some f | VInt i -> Some (float_of_int i) | _ -> None) |> Array.of_list
  | VList items -> items |> List.map snd |> List.filter_map (function VFloat f -> Some f | VInt i -> Some (float_of_int i) | _ -> None) |> Array.of_list
  | _ -> [||]

let register env =
  (*
  --# Discretize numeric vector
  --#
  --# Splits a numeric vector into intervals.
  --#
  --# @name cut
  --# @param x :: Vector[Number] | List[Number] The vector to discretize.
  --# @param breaks :: Int | Vector[Number] Number of bins or specific cut points.
  --# @return :: Vector[String] Vector of interval labels.
  --# @example
  --#   cut([1, 2, 3, 4, 5], 2)
  --#   cut([1, 2, 3, 4, 5], [0, 2.5, 5])
  --# @family stats
  --# @export
  *)
  let env = Env.add "cut"
    (make_builtin_named ~name:"cut" ~variadic:true 2 (fun args _env ->
       let named = List.filter_map (fun (n, v) -> match n with Some name -> Some (name, v) | None -> None) args in
       let positional = List.filter_map (fun (n, v) -> match n with None -> Some v | Some _ -> None) args in
       
       let x_val = match List.assoc_opt "x" named with Some v -> Some v | None -> (match positional with v :: _ -> Some v | [] -> None) in
       let breaks_val = match List.assoc_opt "breaks" named with Some v -> Some v | None -> (match positional with _ :: v :: _ -> Some v | _ -> None) in

       match x_val, breaks_val with
       | Some x_input, Some breaks ->
           let x_floats = to_float_array x_input in
           let input_len = match x_input with VVector v -> Array.length v | VList l -> List.length l | _ -> 0 in
           if Array.length x_floats <> input_len then
             Error.type_error "Function `cut` expects a numeric vector/list without NAs."
           else
             (match breaks with
              | VInt n ->
                  if n < 1 then Error.value_error "Function `cut` requires at least 1 break."
                  else
                    let min_x = Array.fold_left min infinity x_floats in
                    let max_x = Array.fold_left max neg_infinity x_floats in
                    let range = max_x -. min_x in
                    let step = if n > 1 then range /. (float_of_int n) else if n = 1 then range else 0.0 in
                    let labels = Array.init n (fun i ->
                      let low = min_x +. (float_of_int i) *. step in
                      let high = if i = n - 1 then max_x else min_x +. (float_of_int (i+1)) *. step in
                      if n = 1 then Printf.sprintf "[%.2f, %.2f]" min_x max_x
                      else Printf.sprintf "(%.2f, %.2f]" low high
                    ) in
                    let res = Array.map (fun v ->
                      let idx = if range = 0.0 then 0 
                                else min (n - 1) (int_of_float ((v -. min_x) /. step)) in
                      VString labels.(idx)
                    ) x_floats in
                    VVector res
              | VVector _ | VList _ as b_input ->
                  let b_floats = to_float_array b_input in
                  Array.sort Float.compare b_floats;
                  if Array.length b_floats < 2 then Error.value_error "Function `cut` with vector breaks requires at least 2 values."
                  else
                    let labels = Array.init (Array.length b_floats - 1) (fun i ->
                      Printf.sprintf "(%.2f, %.2f]" b_floats.(i) b_floats.(i+1)
                    ) in
                    let res = Array.map (fun v ->
                      let rec find_bin i =
                        if i >= Array.length b_floats - 1 then (VNA NAGeneric)
                        else if (if i = 0 then v >= b_floats.(i) else v > b_floats.(i)) && v <= b_floats.(i+1) then VString labels.(i)
                        else find_bin (i + 1)
                      in
                      find_bin 0
                    ) x_floats in
                    VVector res
              | _ -> Error.type_error "Function `cut` argument `breaks` must be an Int or a numeric Vector."
             )
       | _ -> Error.type_error "Function `cut` expects a numeric Vector/List as first argument and Int/Vector/List as second."
    ))
    env
  in

  (*
  --# Polynomial basis expansion
  --#
  --# Generates a basis of polynomial terms for a numeric vector.
  --#
  --# @name poly
  --# @param x :: Vector[Number] | List[Number] The vector to expand.
  --# @param degree :: Int The degree of the polynomial.
  --# @param raw :: Bool = false If true, return raw powers instead of orthogonal polynomials.
  --# @return :: List[Vector[Float]] A named list of polynomial terms.
  --# @example
  --#   mutate(df, !!!poly($age, 3, raw = true))
  --# @family stats
  --# @export
  *)
  let env = Env.add "poly"
    (make_builtin_named ~name:"poly" ~variadic:true 2 (fun args _env ->
       let named = List.filter_map (fun (n, v) -> match n with Some name -> Some (name, v) | None -> None) args in
       let positional = List.filter_map (fun (n, v) -> match n with None -> Some v | Some _ -> None) args in
       
       let x_val = match List.assoc_opt "x" named with Some v -> Some v | None -> (match positional with v :: _ -> Some v | [] -> None) in
       let degree_val = match List.assoc_opt "degree" named with Some v -> Some v | None -> (match positional with _ :: v :: _ -> Some v | _ -> None) in
       let _raw = match List.assoc_opt "raw" named with Some (VBool b) -> b | _ -> true in (* Default to raw for now as it is easier *)

       match x_val, degree_val with
       | Some x_input, Some (VInt d) ->
           let x_floats = to_float_array x_input in
           let input_len = match x_input with VVector v -> Array.length v | VList l -> List.length l | _ -> 0 in
           if Array.length x_floats <> input_len then
             Error.type_error "Function `poly` expects a numeric vector/list without NAs."
           else
             let res_cols = ref [] in
             for j = 1 to d do
               let col = Array.map (fun v -> VFloat (v ** (float_of_int j))) x_floats in
               res_cols := (Some (Printf.sprintf "poly%d" j), VVector col) :: !res_cols
             done;
             VList (List.rev !res_cols)
       | _ -> Error.type_error "Function `poly` expects a numeric Vector/List and an integer Degree."
    )) env
  in
  env