<- rxp_py_file(
d0 name = gorilla_pixels,
path = 'md_source/gorilla/gorilla-waving-cartoon-black-white-outline-clipart-914.jpg',
read_function = "read_image",
additional_file = "functions/functions.py"
)
rixpress translation of Yanai and Lercher 2020
This pipeline is similar to the example found in python_r
in that it mixes both R and Python code and ultimately outputs an HTML document.
This is inspired by the code found in this blog post. Thank you to Isabella Velásquez for the suggestion!
You can find the original paper here.
Source code is here. The pipeline definition is in gen-pipeline.R
and the environment definition in gen-env.R
.
Let’s start by looking at the original image:
Get the coordinates using Python by defining this derivation:
This is the of the read_image
function:
import numpy as np
from PIL import Image
def read_image(x):
= Image.open(x)
im = np.asarray(im)
pixels return pixels
We then define a threshold and get the remaining coordinates:
<- rxp_py(
d1 = threshold_level,
name = "50"
py_expr
)
<- rxp_py(
d2 = py_coords,
name = "numpy.column_stack(numpy.where(pixels < threshold_level))"
py_expr )
We can then convert py_coords
to an R object:
<- rxp_py2r(
d3 name = raw_coords,
expr = py_coords
)
Then we clean them:
<- rxp_r(
d4 name = coords,
expr = clean_coords(raw_coords),
additional_files = "functions.R"
)
This is the R function used to clean the coordinates:
<- function(coords) {
clean_coords as.data.frame(coords) %>%
::sample_n(1768) %>%
dplyr::mutate(bmi = V2 * 17 + 15, steps = 15000 - V1 * 15000 / max(V1)) %>%
dplyr::mutate(
dplyrrandvar = rnorm(n(), mean = 0, sd = 10),
randi = steps * (1 + randvar),
gender = dplyr::case_when(randi < median(steps) ~ "Female", TRUE ~ "Male")
) }
Finally, we can look at the gender distribution:
::rxp_read("/nix/store/vrnzh9qaln0pv03i6qj3fkr5q3x901yj-gender_dist") rixpress
gender n
1 Female 929
2 Male 839
and some graphs:
::rxp_read("/nix/store/74j8q9z03llh9hg3d5d4wsmg6isbsv7m-plot1") rixpress
::rxp_read("/nix/store/s01aqbrzgsvpww4gwwwsirmsmlnc2k06-plot2") rixpress
The code to compute the gender distribution and graphs:
<- function(coords) {
gender_distribution ::count(coords, gender)
dplyr
}
<- function(coords) {
make_plot1 %>%
coords ggplot(aes(x = bmi, y = steps)) +
geom_point() +
theme_void() +
xlim(0, 15000)
}
<- function(coords) {
make_plot2 %>%
coords ggplot(aes(x = bmi, y = steps, color = gender)) +
geom_point() +
theme_void() +
xlim(0, 15000)
}