1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
open Ast

let count_impl (named_args : (string option * value) list) _env =
  match named_args with
  | (_, VDataFrame df) :: rest ->
      let key_names = List.filter_map (fun (k, v) -> 
        if k = None then Utils.extract_column_name v else None) rest in
      let name_val = match List.assoc_opt (Some "name") rest with Some (VString s) -> s | _ -> "n" in
      
      let keys = if key_names = [] then df.group_keys else key_names in

      if keys = [] then
        (* Just count rows of the whole df *)
        let n = Arrow_table.num_rows df.arrow_table in
        let arrow_table = Arrow_bridge.table_from_value_columns [(name_val, [|VInt n|])] 1 in
        VDataFrame { arrow_table; group_keys = [] }
      else
        let grouped = Arrow_compute.group_by_optimized df.arrow_table keys in
        let agg_table = Arrow_compute.group_aggregate grouped "count" "" in
        (* group_aggregate returns "n" as column name for "count". Rename if needed. *)
        let final_table = 
          if name_val <> "n" then Arrow_compute.rename_columns agg_table [(name_val, "n")]
          else agg_table in
        VDataFrame { arrow_table = final_table; group_keys = [] }
  | _ :: _ -> Error.type_error "Function `count` expects a DataFrame as first argument."
  | [] -> Error.make_error ArityError "Function `count` requires a DataFrame."

(*
--# Count rows by group
--#
--# Counts rows in a DataFrame, optionally by selected columns or existing group keys.
--#
--# @name count
--# @family colcraft
--# @export
*)
let register env =
  Env.add "count" (make_builtin_named ~name:"count" ~variadic:true 1 count_impl) env