1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
open Ast

let slice_generic ~desc (named_args : (string option * value) list) _env =
  match named_args with
  | (_, VDataFrame df) :: rest ->
      let n_limit = match List.assoc_opt (Some "n") rest with Some (VInt i) -> i | _ -> 1 in
      let order_by_val = match List.assoc_opt (Some "order_by") rest with Some v -> v | None -> 
        (match List.filter (fun (k, _) -> k = None) rest with [(_, v)] -> v | _ -> (VNA NAGeneric)) in
      
      (match Utils.extract_column_name order_by_val with
       | None -> Error.type_error "slice_max/min expects `order_by = $column`."
       | Some col_name ->
           match Arrow_table.get_column df.arrow_table col_name with
           | None -> Error.make_error KeyError (Printf.sprintf "Column `%s` not found." col_name)
           | Some col ->
               let values = Arrow_bridge.column_to_values col in
               let indexed = Array.mapi (fun i v -> (i, v)) values in
               
               let compare_v (_, v1) (_, v2) =
                 let c = match v1, v2 with
                   | VInt x, VInt y -> compare x y
                   | VFloat x, VFloat y -> compare x y
                   | VInt x, VFloat y -> compare (float_of_int x) y
                   | VFloat x, VInt y -> compare x (float_of_int y)
                   | VString x, VString y -> String.compare x y
                   | VNA _, VNA _ -> 0
                   | VNA _, _ -> -1
                   | _, VNA _ -> 1
                   | _ -> 0
                 in
                 if desc then -c else c
               in
               
               Array.sort compare_v indexed;
               
               let top_n = ref [] in
               for i = 0 to min n_limit (Array.length indexed) - 1 do
                 top_n := (fst indexed.(i)) :: !top_n
               done;
               
               let sub_table = Arrow_compute.take_rows df.arrow_table (List.rev !top_n) in
               VDataFrame { df with arrow_table = sub_table })
  | _ :: _ -> Error.type_error "Function expects a DataFrame as first argument."
  | [] -> Error.make_error ArityError "Function requires a DataFrame."

let slice_max_impl = slice_generic ~desc:true
let slice_min_impl = slice_generic ~desc:false

(*
--# Keep rows with the largest values
--#
--# Returns the rows with the highest values in an ordering column.
--#
--# @name slice_max
--# @family colcraft
--# @export
*)
(*
--# Keep rows with the smallest values
--#
--# Returns the rows with the lowest values in an ordering column.
--#
--# @name slice_min
--# @family colcraft
--# @export
*)
let register env =
  let env = Env.add "slice_max" (make_builtin_named ~name:"slice_max" ~variadic:true 1 slice_max_impl) env in
  let env = Env.add "slice_min" (make_builtin_named ~name:"slice_min" ~variadic:true 1 slice_min_impl) env in
  env