1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
(* src/packages/colcraft/factors.ml *)
open Ast

(* Convert values to optional strings: NA values become None (and are preserved
   as VNA in the output factor), non-string values are stringified.
   None entries are excluded from level derivation. *)
let as_string_list_opt values =
  let rec aux acc = function
    | [] -> List.rev acc
    | VNA _ :: t -> aux (None :: acc) t
    | VString s :: t -> aux (Some s :: acc) t
    | v :: t -> aux (Some (Utils.value_to_string v) :: acc) t
  in aux [] values

(* Scan an array for the first VFactor element and return its (levels, ordered).
   Skips VNA and any other non-Factor values, so NA-leading vectors are handled
   correctly. Returns None if no VFactor is found. *)
let find_first_factor_in_array arr =
  let n = Array.length arr in
  let rec aux i =
    if i >= n then None
    else match arr.(i) with
      | VFactor (_, levels, ordered) -> Some (levels, ordered)
      | _ -> aux (i + 1)
  in aux 0

(* Return the index of string [s] in the level list [levels], or None if absent.
   Used by replace_na and complete when accepting a VString fill value for a
   DictionaryColumn. *)
let level_index_of levels s =
  let rec aux i = function
    | [] -> None
    | h :: _ when h = s -> Some i
    | _ :: t -> aux (i + 1) t
  in aux 0 levels

let factor_generic ~fct_mode (args : (string option * value) list) _env =
  let positional = List.filter_map (fun (k, v) -> if k = None then Some v else None) args in
  let named = List.filter_map (fun (k, v) -> match k with Some n -> Some (n, v) | None -> None) args in
  match positional with
  | [] -> Error.make_error Ast.ArityError "factor expects at least 1 argument"
  | x_val :: _ ->
      let items = match x_val with
        | VVector a -> Array.to_list a
        | VList l -> List.map snd l
        | _ -> [x_val]
      in
      let string_opts = as_string_list_opt items in

      (* Only non-NA values contribute to level derivation *)
      let non_na_strings = List.filter_map Fun.id string_opts in
      let unique_levels = 
        if fct_mode then
          (* for fct(), levels follow first appearance *)
          let rec first_appearance acc = function
            | [] -> List.rev acc
            | h :: t -> if List.mem h acc then first_appearance acc t else first_appearance (h :: acc) t
          in first_appearance [] non_na_strings
        else
          List.sort_uniq String.compare non_na_strings
      in

      let levels =
        match List.assoc_opt "levels" named with
        | Some (VVector a) -> List.filter_map Fun.id (as_string_list_opt (Array.to_list a))
        | Some (VList l) -> List.filter_map Fun.id (as_string_list_opt (List.map snd l))
        | _ -> unique_levels
      in

      let ordered =
        match List.assoc_opt "ordered" named with
        | Some (VBool b) -> b
        | _ -> false
      in

      (* Pre-compute level -> index table for O(1) lookup per element *)
      let level_tbl = Hashtbl.create (List.length levels) in
      List.iteri (fun i l -> Hashtbl.add level_tbl l i) levels;

      let factor_arr = Array.of_list string_opts |> Array.map (function
        | None -> VNA Ast.NAGeneric
        | Some s ->
            (match Hashtbl.find_opt level_tbl s with
             | Some idx -> VFactor (idx, levels, ordered)
             | None -> VNA Ast.NAGeneric (* NA if value is not in levels *))
      ) in
      VVector factor_arr

let factor_impl = factor_generic ~fct_mode:false
let fct_impl = factor_generic ~fct_mode:true
let ordered_impl (args : (string option * value) list) _env =
  (* same as factor but defaults ordered=true *)
  let named = List.filter_map (fun (k, v) -> match k with Some n -> Some (n, v) | None -> None) args in
  let has_ordered = List.mem_assoc "ordered" named in
  if has_ordered then factor_impl args _env
  else factor_impl ((Some "ordered", VBool true) :: args) _env

let as_factor_impl args env =
  factor_impl args env

let fct_infreq_impl args _env =
  let positional = List.filter_map (fun (k, v) -> if k = None then Some v else None) args in
  match positional with
  | [VVector arr] ->
      let factors = Array.to_list arr in
      let c, levels, ordered =
        match find_first_factor_in_array arr with
        | Some (l, o) -> true, l, o
        | None -> false, [], false
      in
      if not c then VVector arr
      else
        let counts = Hashtbl.create (List.length levels) in
        List.iter (fun v -> match v with
          | VFactor(i, _, _) ->
              let prev = match Hashtbl.find_opt counts i with Some c -> c | None -> 0 in
              Hashtbl.replace counts i (prev + 1)
          | _ -> ()
        ) factors;

        let new_levels = levels
          |> List.mapi (fun i l -> (i, l, match Hashtbl.find_opt counts i with Some c -> c | None -> 0))
          |> List.sort (fun (_, _, c1) (_, _, c2) -> compare c2 c1)
          |> List.map (fun (_, l, _) -> l)
        in

        (* Pre-compute new level -> index table for O(1) remapping *)
        let new_level_tbl = Hashtbl.create (List.length new_levels) in
        List.iteri (fun i l -> Hashtbl.add new_level_tbl l i) new_levels;

        let factor_arr = Array.map (fun v -> match v with
          | VFactor (i, _, _) ->
              (match List.nth_opt levels i with
               | Some s ->
                   let new_idx = match Hashtbl.find_opt new_level_tbl s with
                     | Some idx -> idx
                     | None -> 0
                   in
                   VFactor (new_idx, new_levels, ordered)
               | None ->
                   (* Out-of-range factor index; treat as NA to avoid crashing *)
                   VNA Ast.NAGeneric)
          | _ -> v
        ) arr in
        VVector factor_arr
  | _ -> Error.make_error Ast.ArityError "fct_infreq expects 1 argument (vector of factors)"

let levels_impl args _env =
  match args with
  | [VVector arr] ->
      if Array.length arr = 0 then VVector [||]
      else
        (match find_first_factor_in_array arr with
         | Some (levels, _) -> VVector (Array.of_list (List.map (fun s -> VString s) levels))
         | None -> VVector [||])
  | _ -> Error.make_error Ast.ArityError "levels expects 1 argument"

let fct_rev_impl args _env =
  match args with
  | [VVector arr] ->
      if Array.length arr = 0 then VVector [||]
      else
        (match find_first_factor_in_array arr with
         | Some (levels, ordered) ->
             let new_levels = List.rev levels in
             let n = List.length levels in
             let factor_arr = Array.map (function
               | VFactor (i, _, _) -> VFactor (n - 1 - i, new_levels, ordered)
               | v -> v
             ) arr in
             VVector factor_arr
         | None -> VVector arr)
  | _ -> Error.make_error Ast.ArityError "fct_rev expects 1 argument"

let fct_recode_impl (args : (string option * value) list) _env =
  let positional = List.filter_map (fun (k, v) -> if k = None then Some v else None) args in
  let named = List.filter_map (fun (k, v) -> match k with Some n -> Some (n, v) | None -> None) args in
  match positional with
  | [VVector arr] ->
      if Array.length arr = 0 then VVector [||]
      else
        (match find_first_factor_in_array arr with
         | Some (levels, ordered) ->
            (* named: new_level = old_level *)
            let recode_map = List.filter_map (fun (new_l, old_v) ->
              match old_v with
              | VString old_l -> Some (old_l, new_l)
              | _ -> None
            ) named in
            
            let new_levels_with_dups = List.map (fun l ->
              match List.assoc_opt l recode_map with
              | Some new_l -> new_l
              | None -> l
            ) levels in
            
            (* Deduplicate levels if multiple old levels mapped to same new level *)
            let final_levels = 
              let rec unique acc = function
                | [] -> List.rev acc
                | h :: t -> if List.mem h acc then unique acc t else unique (h :: acc) t
              in unique [] new_levels_with_dups
            in
            
            let level_remapping = List.mapi (fun _i old_l ->
              let new_l = match List.assoc_opt old_l recode_map with Some nl -> nl | None -> old_l in
              match List.find_index (fun l -> l = new_l) final_levels with
              | Some idx -> idx
              | None -> 0
            ) levels |> Array.of_list in
            
            let factor_arr = Array.map (function
              | VFactor (i, _, _) ->
                  let new_idx = level_remapping.(i) in
                  VFactor (new_idx, final_levels, ordered)
              | v -> v
            ) arr in
            VVector factor_arr
         | None -> VVector arr)
  | _ -> Error.make_error Ast.ArityError "fct_recode expects at least 1 argument"

let fct_reorder_impl (args : (string option * value) list) _env =
  let positional = List.filter_map (fun (k, v) -> if k = None then Some v else None) args in
  let named = List.filter_map (fun (k, v) -> match k with Some n -> Some (n, v) | None -> None) args in
  match positional with
  | [VVector f_arr; VVector x_arr] ->
      if Array.length f_arr = 0 then VVector [||]
      else
        (match find_first_factor_in_array f_arr with
         | Some (levels, ordered) ->
             let n = Array.length f_arr in
             let desc = match List.assoc_opt ".desc" named with Some (VBool b) -> b | _ -> false in
             
             (* Group .x values by factor levels *)
             let level_data = Array.init (List.length levels) (fun _ -> []) in
             for i = 0 to n - 1 do
               match f_arr.(i) with
               | VFactor (idx, _, _) -> 
                   (match x_arr.(i) with
                    | VInt x -> level_data.(idx) <- float_of_int x :: level_data.(idx)
                    | VFloat x -> level_data.(idx) <- x :: level_data.(idx)
                    | _ -> ())
               | _ -> ()
             done;
             
             (* Calculate summary (median by default) *)
             let summaries = Array.mapi (fun i data ->
               if data = [] then (i, neg_infinity)
               else
                 let sorted = List.sort Float.compare data in
                 let len = List.length sorted in
                 let median = 
                   if len mod 2 = 1 then List.nth sorted (len / 2)
                   else (List.nth sorted (len / 2 - 1) +. List.nth sorted (len / 2)) /. 2.
                 in
                 (i, median)
             ) level_data in
             
             let sorted_summaries = Array.to_list summaries |> List.sort (fun (_, s1) (_, s2) ->
               if desc then Float.compare s2 s1 else Float.compare s1 s2
             ) in
             
             let new_level_order = List.map fst sorted_summaries in
             let new_levels = List.map (fun i -> List.nth levels i) new_level_order in
             let remapping = Array.make (List.length levels) 0 in
             List.iteri (fun i old_idx -> remapping.(old_idx) <- i) new_level_order;
             
             let factor_arr = Array.map (function
               | VFactor (i, _, _) -> VFactor (remapping.(i), new_levels, ordered)
               | v -> v
             ) f_arr in
             VVector factor_arr
         | None -> VVector f_arr)
  | _ -> Error.make_error Ast.ArityError "fct_reorder expects at least 2 arguments (.f and .x)"

let fct_lump_n_impl (args : (string option * value) list) _env =
  let positional = List.filter_map (fun (k, v) -> if k = None then Some v else None) args in
  let named = List.filter_map (fun (k, v) -> match k with Some n -> Some (n, v) | None -> None) args in
  match positional with
  | [VVector arr] ->
      if Array.length arr = 0 then VVector [||]
      else
        (match find_first_factor_in_array arr with
         | Some (levels, ordered) ->
             let n_limit = match List.assoc_opt "n" named with Some (VInt n) -> n | _ -> 10 in
             let other_level = match List.assoc_opt "other_level" named with Some (VString s) -> s | _ -> "Other" in
             
             let counts = Hashtbl.create (List.length levels) in
             Array.iter (function
               | VFactor (i, _, _) -> 
                   let prev = match Hashtbl.find_opt counts i with Some c -> c | None -> 0 in
                   Hashtbl.replace counts i (prev + 1)
               | _ -> ()
             ) arr;
             
             let sorted_counts = List.mapi (fun i l -> (i, l, match Hashtbl.find_opt counts i with Some c -> c | None -> 0)) levels
                                |> List.sort (fun (_, _, c1) (_, _, c2) -> compare c2 c1) in
             
             let top_n = List.filteri (fun i _ -> i < n_limit) sorted_counts in
             let top_indices = List.map (fun (i, _, _) -> i) top_n in
             
             let new_levels = (List.map (fun (_, l, _) -> l) top_n) @ [other_level] in
             let other_idx = List.length new_levels - 1 in
             
             let remapping = Array.make (List.length levels) other_idx in
             List.iteri (fun i old_idx -> remapping.(old_idx) <- i) top_indices;
             
             let factor_arr = Array.map (function
               | VFactor (i, _, _) -> VFactor (remapping.(i), new_levels, ordered)
               | v -> v
             ) arr in
             VVector factor_arr
         | None -> VVector arr)
  | _ -> Error.make_error Ast.ArityError "fct_lump_n expects 1 argument"

let fct_relevel_impl (args : (string option * value) list) _env =
  let positional = List.filter_map (fun (k, v) -> if k = None then Some v else None) args in
  let named = List.filter_map (fun (k, v) -> match k with Some n -> Some (n, v) | None -> None) args in
  match positional with
  | VVector f_arr :: levels_to_move ->
      if Array.length f_arr = 0 then VVector [||]
      else
        (match find_first_factor_in_array f_arr with
         | Some (levels, ordered) ->
             let to_move = List.filter_map (function VString s -> Some s | _ -> None) levels_to_move in
             let after = match List.assoc_opt "after" named with Some (VInt i) -> i | _ -> 0 in
             
             let stable_other = List.filter (fun l -> not (List.mem l to_move)) levels in
             let new_levels = 
               if after = 0 then to_move @ stable_other
               else if after >= List.length stable_other then stable_other @ to_move
               else 
                 let rec insert i acc = function
                   | [] -> List.rev acc
                   | h :: t -> if i = after then List.rev (to_move @ (h :: acc)) @ t else insert (i + 1) (h :: acc) t
                 in insert 1 [] stable_other
             in
             
             let remapping = Array.make (List.length levels) 0 in
             List.iteri (fun i old_l ->
               match List.find_index (fun l -> l = old_l) new_levels with
               | Some idx -> remapping.(i) <- idx
               | None -> ()
             ) levels;
             
             let factor_arr = Array.map (function
               | VFactor (i, _, _) -> VFactor (remapping.(i), new_levels, ordered)
               | v -> v
             ) f_arr in
             VVector factor_arr
         | None -> VVector f_arr)
  | _ -> Error.make_error Ast.ArityError "fct_relevel expects at least 1 argument"

let fct_collapse_impl (args : (string option * value) list) _env =
  let positional = List.filter_map (fun (k, v) -> if k = None then Some v else None) args in
  let named = List.filter_map (fun (k, v) -> match k with Some n -> Some (n, v) | None -> None) args in
  match positional with
  | [VVector arr] ->
      if Array.length arr = 0 then VVector [||]
      else
        (match find_first_factor_in_array arr with
         | Some (levels, ordered) ->
             (* named: new_level = [old_level1, old_level2] *)
             let collapse_map = List.filter_map (fun (new_l, old_vs) ->
               match old_vs with
               | VVector a -> Some (Array.to_list a |> List.filter_map (function VString s -> Some s | _ -> None), new_l)
               | VList l -> Some (List.map snd l |> List.filter_map (function VString s -> Some s | _ -> None), new_l)
               | VString s -> Some ([s], new_l)
               | _ -> None
             ) named in
             
             let new_levels_with_dups = List.map (fun l ->
               match List.find (fun (olds, _) -> List.mem l olds) collapse_map with
               | exception Not_found -> l
               | (_, new_l) -> new_l
             ) levels in
             
             let final_levels = 
                let rec unique acc = function
                  | [] -> List.rev acc
                  | h :: t -> if List.mem h acc then unique acc t else unique (h :: acc) t
                in unique [] new_levels_with_dups
             in
             
             let level_remapping = List.mapi (fun _i old_l ->
               let new_l = match List.find (fun (olds, _) -> List.mem old_l olds) collapse_map with
                 | exception Not_found -> old_l
                 | (_, nl) -> nl
               in
               match List.find_index (fun l -> l = new_l) final_levels with
               | Some idx -> idx
               | None -> 0
             ) levels |> Array.of_list in
             
             let factor_arr = Array.map (function
               | VFactor (i, _, _) -> VFactor (level_remapping.(i), final_levels, ordered)
               | v -> v
             ) arr in
             VVector factor_arr
         | None -> VVector arr)
  | _ -> Error.make_error Ast.ArityError "fct_collapse expects at least 1 argument"

let count_levels levels arr =
  let counts = Array.make (List.length levels) 0 in
  Array.iter (function
    | VFactor (i, _, _) when i >= 0 && i < Array.length counts ->
        counts.(i) <- counts.(i) + 1
    | _ -> ()
  ) arr;
  counts

let remap_factor_array arr _levels ordered new_levels remapping =
  let factor_arr = Array.map (function
    | VFactor (i, _, _) when i >= 0 && i < Array.length remapping ->
        let new_idx = remapping.(i) in
        if new_idx < 0 then VNA Ast.NAGeneric else VFactor (new_idx, new_levels, ordered)
    | VFactor _ -> VNA Ast.NAGeneric
    | value -> value
  ) arr in
  VVector factor_arr

let append_unique_levels base candidates =
  let seen = Hashtbl.create (List.length base + List.length candidates) in
  List.iter (fun level -> Hashtbl.replace seen level ()) base;
  let extras_rev = ref [] in
  List.iter (fun level ->
    if not (Hashtbl.mem seen level) then begin
      Hashtbl.replace seen level ();
      extras_rev := level :: !extras_rev
    end
  ) candidates;
  base @ List.rev !extras_rev

let string_values_of function_name value =
  match value with
  | VString s -> Ok [s]
  | VVector arr ->
      Array.fold_right (fun item acc ->
        match item, acc with
        | VString s, Ok values -> Ok (s :: values)
        | _, Ok _ ->
            Error
              (Error.type_error
                 (Printf.sprintf "Function `%s` expects string level names." function_name))
        | _, Error err -> Error err
      ) arr (Ok [])
  | VList items ->
      List.fold_right (fun (_, item) acc ->
        match item, acc with
        | VString s, Ok values -> Ok (s :: values)
        | _, Ok _ ->
            Error
              (Error.type_error
                 (Printf.sprintf "Function `%s` expects string level names." function_name))
        | _, Error err -> Error err
      ) items (Ok [])
  | _ ->
      Error
        (Error.type_error
           (Printf.sprintf "Function `%s` expects string level names." function_name))

let fct_lump_min_impl (args : (string option * value) list) _env =
  let positional = List.filter_map (fun (k, v) -> if k = None then Some v else None) args in
  let named = List.filter_map (fun (k, v) -> match k with Some n -> Some (n, v) | None -> None) args in
  match positional with
  | [VVector arr; VInt min_count] ->
      if Array.length arr = 0 then VVector [||]
      else
        (match find_first_factor_in_array arr with
         | Some (levels, ordered) ->
             let other_level =
               match List.assoc_opt "other_level" named with
               | Some (VString s) -> s
               | _ -> "Other"
             in
             let counts = count_levels levels arr in
             let kept_levels =
               List.mapi (fun idx level -> (idx, level)) levels
               |> List.filter (fun (idx, _) -> counts.(idx) >= min_count)
             in
              if List.length kept_levels = List.length levels then
                VVector arr
              else
                let new_levels = List.rev (other_level :: List.rev (List.map snd kept_levels)) in
                let other_idx = List.length new_levels - 1 in
                let remapping = Array.make (List.length levels) other_idx in
                List.iteri (fun new_idx (old_idx, _) -> remapping.(old_idx) <- new_idx) kept_levels;
                remap_factor_array arr levels ordered new_levels remapping
         | None -> VVector arr)
  | _ -> Error.make_error Ast.ArityError "fct_lump_min expects a factor vector and minimum count"

let fct_lump_prop_impl (args : (string option * value) list) _env =
  let positional = List.filter_map (fun (k, v) -> if k = None then Some v else None) args in
  let named = List.filter_map (fun (k, v) -> match k with Some n -> Some (n, v) | None -> None) args in
  let prop_of_value = function
    | VFloat f -> Some f
    | VInt i -> Some (float_of_int i)
    | _ -> None
  in
  match positional with
  | [VVector arr; prop_value] ->
      (match prop_of_value prop_value with
       | None ->
           Error.type_error "Function `fct_lump_prop` expects a numeric proportion."
       | Some prop ->
           if prop < 0.0 || prop > 1.0 then
             Error.value_error "Function `fct_lump_prop` proportion must be between 0 and 1."
           else if Array.length arr = 0 then
             VVector [||]
           else
             (match find_first_factor_in_array arr with
              | Some (levels, ordered) ->
                  let other_level =
                    match List.assoc_opt "other_level" named with
                    | Some (VString s) -> s
                    | _ -> "Other"
                  in
                  let counts = count_levels levels arr in
                  let total =
                    Array.fold_left ( + ) 0 counts |> float_of_int
                  in
                  let kept_levels =
                    List.mapi (fun idx level -> (idx, level)) levels
                    |> List.filter (fun (idx, _) ->
                         total > 0.0 && (float_of_int counts.(idx) /. total) >= prop)
                  in
                   if List.length kept_levels = List.length levels then
                     VVector arr
                   else
                    let new_levels = List.rev (other_level :: List.rev (List.map snd kept_levels)) in
                    let other_idx = List.length new_levels - 1 in
                    let remapping = Array.make (List.length levels) other_idx in
                    List.iteri (fun new_idx (old_idx, _) -> remapping.(old_idx) <- new_idx) kept_levels;
                    remap_factor_array arr levels ordered new_levels remapping
              | None -> VVector arr))
  | _ -> Error.make_error Ast.ArityError "fct_lump_prop expects a factor vector and proportion"

let fct_other_impl (args : (string option * value) list) _env =
  let positional = List.filter_map (fun (k, v) -> if k = None then Some v else None) args in
  let named = List.filter_map (fun (k, v) -> match k with Some n -> Some (n, v) | None -> None) args in
  let extract_names field =
    match List.assoc_opt field named with
    | Some value ->
        string_values_of "fct_other" value
    | None -> Ok []
  in
  match positional with
  | [VVector arr] ->
      (match find_first_factor_in_array arr, extract_names "keep", extract_names "drop" with
       | Some (levels, ordered), Ok keep, Ok drop ->
           if keep <> [] && drop <> [] then
             Error.value_error "Function `fct_other` accepts either `keep` or `drop`, not both."
           else
             let other_level =
               match List.assoc_opt "other_level" named with
               | Some (VString s) -> s
               | _ -> "Other"
             in
             let keep_levels =
               if keep <> [] then keep else List.filter (fun level -> not (List.mem level drop)) levels
             in
             let kept_present = List.filter (fun level -> List.mem level keep_levels) levels in
             let new_levels =
               if List.length kept_present = List.length levels then kept_present
               else kept_present @ [other_level]
             in
             let other_idx = List.length new_levels - 1 in
             let remapping = Array.make (List.length levels) other_idx in
             List.iteri (fun old_idx level ->
               match List.find_index (( = ) level) kept_present with
               | Some new_idx -> remapping.(old_idx) <- new_idx
               | None -> ()
             ) levels;
             remap_factor_array arr levels ordered new_levels remapping
       | None, _, _ -> VVector arr
       | _, Error err, _ | _, _, Error err -> err)
  | _ -> Error.make_error Ast.ArityError "fct_other expects a factor vector"

let fct_drop_impl args _env =
  match args with
  | [VVector arr] ->
      (match find_first_factor_in_array arr with
       | Some (levels, ordered) ->
           let used = Array.make (List.length levels) false in
           Array.iter (function
             | VFactor (i, _, _) when i >= 0 && i < Array.length used -> used.(i) <- true
             | _ -> ()
           ) arr;
           let kept_levels =
             List.mapi (fun idx level -> (idx, level)) levels
             |> List.filter (fun (idx, _) -> used.(idx))
           in
           let new_levels = List.map snd kept_levels in
           let remapping = Array.make (List.length levels) (-1) in
           List.iteri (fun new_idx (old_idx, _) -> remapping.(old_idx) <- new_idx) kept_levels;
           remap_factor_array arr levels ordered new_levels remapping
       | None -> VVector arr)
  | _ -> Error.make_error Ast.ArityError "fct_drop expects 1 argument"

let fct_expand_impl (args : (string option * value) list) _env =
  let positional = List.filter_map (fun (k, v) -> if k = None then Some v else None) args in
  let candidate_levels value =
    match value with
    | VString s -> [s]
    | VVector arr ->
        Array.fold_left (fun acc item ->
          match item with
          | VString s -> s :: acc
          | _ -> acc
        ) [] arr
        |> List.rev
    | VList items ->
        List.fold_left (fun acc (_, item) ->
          match item with
          | VString s -> s :: acc
          | _ -> acc
        ) [] items
        |> List.rev
    | _ -> []
  in
  match positional with
  | VVector arr :: extra_levels ->
      (match find_first_factor_in_array arr with
       | Some (levels, ordered) ->
           let new_levels =
             List.fold_left (fun acc value ->
               append_unique_levels acc (candidate_levels value)
             ) levels extra_levels
           in
           let remapping = Array.init (List.length levels) Fun.id in
           remap_factor_array arr levels ordered new_levels remapping
       | None -> VVector arr)
  | _ -> Error.make_error Ast.ArityError "fct_expand expects a factor vector and optional levels"

let fct_c_impl (args : (string option * value) list) _env =
  let positional = List.filter_map (fun (k, v) -> if k = None then Some v else None) args in
  let extract_values value =
    match value with
    | VVector arr -> Array.to_list arr
    | VList items -> List.map snd items
    | scalar -> [scalar]
  in
  let labels_of_values values =
    List.map (function
      | VFactor (idx, levels, _) -> List.nth_opt levels idx
      | VString s -> Some s
      | VNA _ -> None
      | other -> Some (Utils.value_to_string other)
    ) values
  in
  match positional with
  | [] -> Error.make_error Ast.ArityError "fct_c expects at least one factor vector"
  | values ->
      let concatenated = List.concat_map extract_values values in
      let ordered =
        List.for_all (function
          | VVector arr ->
              (match find_first_factor_in_array arr with
               | Some (_, is_ordered) -> is_ordered
               | None -> true)
          | _ -> true) values
      in
      let unified_levels =
        List.fold_left (fun acc value ->
          let levels_to_add =
            match value with
            | VVector arr ->
                (match find_first_factor_in_array arr with
                 | Some (levels, _) -> levels
                 | None ->
                     labels_of_values (Array.to_list arr) |> List.filter_map Fun.id)
            | VList items ->
                labels_of_values (List.map snd items) |> List.filter_map Fun.id
            | scalar ->
                labels_of_values [scalar] |> List.filter_map Fun.id
          in
          append_unique_levels acc levels_to_add
        ) [] values
      in
      let remap = Hashtbl.create (List.length unified_levels) in
      List.iteri (fun idx level -> Hashtbl.replace remap level idx) unified_levels;
      let factor_values =
        labels_of_values concatenated
        |> List.map (function
             | None -> VNA Ast.NAGeneric
             | Some label ->
                 (match Hashtbl.find_opt remap label with
                  | Some idx -> VFactor (idx, unified_levels, ordered)
                  | None -> VNA Ast.NAGeneric))
      in
      VVector (Array.of_list factor_values)

(*
--# Create factor values
--#
--# Converts values to factor-encoded vectors with derived or explicit levels.
--#
--# @name factor
--# @family colcraft
--# @export
*)
(*
--# Coerce values to factors
--#
--# Alias for factor() that converts values to factor-encoded vectors.
--#
--# @name as_factor
--# @family colcraft
--# @export
*)
(*
--# Order factor levels by frequency
--#
--# Reorders factor levels so that more frequent levels appear first.
--#
--# @name fct_infreq
--# @family colcraft
--# @export
*)
(*
--# Get factor levels
--#
--# Returns the level labels stored on a factor vector.
--#
--# @name levels
--# @family colcraft
--# @export
*)
(*
--# Reverse factor levels
--#
--# Reverses the order of the levels in a factor vector.
--#
--# @name fct_rev
--# @family colcraft
--# @export
*)
(*
--# Rename factor levels
--#
--# Recodes existing factor levels using named replacements.
--#
--# @name fct_recode
--# @family colcraft
--# @export
*)
(*
--# Order factor levels by another vector
--#
--# Reorders factor levels using summary statistics computed from a companion numeric vector.
--#
--# @name fct_reorder
--# @family colcraft
--# @export
*)
(*
--# Keep the most frequent factor levels
--#
--# Collapses infrequent factor levels into an other bucket while keeping the most frequent levels.
--#
--# @name fct_lump_n
--# @family colcraft
--# @export
*)
(*
--# Lump factor levels below a minimum count
--#
--# Collapses factor levels whose counts fall below a minimum threshold.
--#
--# @name fct_lump_min
--# @family colcraft
--# @export
*)
(*
--# Lump factor levels below a minimum proportion
--#
--# Collapses factor levels whose frequency falls below a proportion threshold.
--#
--# @name fct_lump_prop
--# @family colcraft
--# @export
*)
(*
--# Create factors in first-seen order
--#
--# Creates a factor whose levels follow the first appearance order of the input values.
--#
--# @name fct
--# @family colcraft
--# @export
*)
(*
--# Move selected levels to the front
--#
--# Explicitly reorders a factor by moving named levels ahead of the remaining levels.
--#
--# @name fct_relevel
--# @family colcraft
--# @export
*)
(*
--# Collapse multiple levels
--#
--# Merges several existing factor levels into new grouped levels.
--#
--# @name fct_collapse
--# @family colcraft
--# @export
*)
(*
--# Replace unlisted levels with Other
--#
--# Keeps selected factor levels and maps the rest to an other bucket.
--#
--# @name fct_other
--# @family colcraft
--# @export
*)
(*
--# Drop unused factor levels
--#
--# Removes levels that are not referenced by any value in the factor vector.
--#
--# @name fct_drop
--# @family colcraft
--# @export
*)
(*
--# Add explicit factor levels
--#
--# Adds extra levels to a factor without changing existing assignments.
--#
--# @name fct_expand
--# @family colcraft
--# @export
*)
(*
--# Concatenate factor vectors
--#
--# Combines multiple factor vectors while reconciling their levels.
--#
--# @name fct_c
--# @family colcraft
--# @export
*)
(*
--# Create ordered factors
--#
--# Creates factor vectors marked as ordered for ordinal comparisons.
--#
--# @name ordered
--# @family colcraft
--# @export
*)
let register env =
  let env = Env.add "factor" (make_builtin_named ~name:"factor" ~variadic:true 1 factor_impl) env in
  let env = Env.add "as_factor" (make_builtin_named ~name:"as_factor" ~variadic:true 1 as_factor_impl) env in
  let env = Env.add "fct_infreq" (make_builtin_named ~name:"fct_infreq" ~variadic:true 1 fct_infreq_impl) env in
  let env = Env.add "levels" (make_builtin ~name:"levels" 1 levels_impl) env in
  let env = Env.add "fct_rev" (make_builtin ~name:"fct_rev" 1 fct_rev_impl) env in
  let env = Env.add "fct_recode" (make_builtin_named ~name:"fct_recode" ~variadic:true 1 fct_recode_impl) env in
  let env = Env.add "fct_reorder" (make_builtin_named ~name:"fct_reorder" ~variadic:true 2 fct_reorder_impl) env in
  let env = Env.add "fct_lump_n" (make_builtin_named ~name:"fct_lump_n" ~variadic:true 1 fct_lump_n_impl) env in
  let env = Env.add "fct_lump_min" (make_builtin_named ~name:"fct_lump_min" ~variadic:true 2 fct_lump_min_impl) env in
  let env = Env.add "fct_lump_prop" (make_builtin_named ~name:"fct_lump_prop" ~variadic:true 2 fct_lump_prop_impl) env in
  let env = Env.add "fct" (make_builtin_named ~name:"fct" ~variadic:true 1 fct_impl) env in
  let env = Env.add "fct_relevel" (make_builtin_named ~name:"fct_relevel" ~variadic:true 1 fct_relevel_impl) env in
  let env = Env.add "fct_collapse" (make_builtin_named ~name:"fct_collapse" ~variadic:true 1 fct_collapse_impl) env in
  let env = Env.add "fct_other" (make_builtin_named ~name:"fct_other" ~variadic:true 1 fct_other_impl) env in
  let env = Env.add "fct_drop" (make_builtin ~name:"fct_drop" 1 fct_drop_impl) env in
  let env = Env.add "fct_expand" (make_builtin_named ~name:"fct_expand" ~variadic:true 1 fct_expand_impl) env in
  let env = Env.add "fct_c" (make_builtin_named ~name:"fct_c" ~variadic:true 1 fct_c_impl) env in
  let env = Env.add "ordered" (make_builtin_named ~name:"ordered" ~variadic:true 1 ordered_impl) env in
  env