1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
open Ast

(*
--# Select columns
--#
--# Selects specific columns from a DataFrame.
--#
--# @name select
--# @param df :: DataFrame The input DataFrame.
--# @param ... :: Symbol Variable number of column names (e.g., $col1, $col2).
--# @return :: DataFrame The DataFrame with selected columns.
--# @example
--#   select(mtcars, $mpg, $wt)
--# @family colcraft
--# @seealso filter, mutate
--# @export
*)
let register env =
  Env.add "select"
    (make_builtin ~name:"select" ~variadic:true 1 (fun args _env ->
      match args with
      | VDataFrame df :: col_args ->
          let process_col v =
            match v with
            | VSymbol _ ->
                (match Utils.extract_column_name v with
                 | Some s -> Ok [s]
                 | None -> Error (Error.type_error "Function `select` invalid symbol."))
            | VString s -> Ok [s]
            | VList items ->
                let names = List.map (fun (_, v) -> match v with VString s -> Ok s | _ -> Error (Error.type_error "List in `select` must contain strings.")) items in
                (match List.find_opt Result.is_error names with
                 | Some (Error e) -> Error e
                 | _ -> Ok (List.map (fun r -> match r with Ok s -> s | _ -> "") names))
            | VBuiltin b ->
                (* Special Case: Selection Helper Matcher *)
                (match b.b_func [(None, VDataFrame df)] (ref Env.empty) with
                 | VList items -> 
                     let names = List.map (fun (_, v) -> match v with VString s -> Ok s | _ -> Error (Error.type_error "Matcher must return list of strings.")) items in
                     (match List.find_opt Result.is_error names with
                      | Some (Error e) -> Error e
                      | _ -> Ok (List.map (fun r -> match r with Ok s -> s | _ -> "") names))
                 | other -> Error (Error.type_error ("Matcher returned " ^ Utils.value_to_string other)))
            | _ -> Error (Error.type_error "Function `select` expects $column syntax.")
          in
          let all_names_results = List.map process_col col_args in
          (match List.find_opt (fun r -> match r with Error _ -> true | _ -> false) all_names_results with
           | Some (Error e) -> e
           | _ ->
             let names = List.concat_map (fun r -> match r with Ok ns -> ns | _ -> []) all_names_results in
             let missing = List.filter (fun n -> not (Arrow_table.has_column df.arrow_table n)) names in
             if missing <> [] then
               Error.make_error KeyError (Printf.sprintf "Column(s) not found: %s." (String.concat ", " missing))
             else
               let new_table = Arrow_compute.project df.arrow_table names in
               let remaining_keys = List.filter (fun k -> List.mem k names) df.group_keys in
               VDataFrame { arrow_table = new_table; group_keys = remaining_keys })
      | _ :: _ -> Error.type_error "Function `select` expects a DataFrame as first argument."
      | _ -> Error.make_error ArityError "Function `select` requires a DataFrame and at least one $column."
    ))
    env