This workflow runs in chunks of the *companies
data and
caches intermediate results. This saves memory, completes faster, and
allows you to resume after interruptions.
Setup
library(dplyr, warn.conflicts = FALSE)
library(readr, warn.conflicts = FALSE)
library(rappdirs)
library(future)
library(fs)
# Masking `tiltIndicatorAfter::profile*()` to use `chunks`
library(tiltWorkflows)
#> Loading required package: tiltIndicatorAfter
#> Loading required package: tiltToyData
#>
#> Attaching package: 'tiltWorkflows'
#> The following objects are masked from 'package:tiltIndicatorAfter':
#>
#> profile_emissions, profile_emissions_upstream, profile_sector,
#> profile_sector_upstream
If the parameter chunks
is NULL
(default)
your *companies
dataset is automatically chunked to
distribute its companies across available cores. This uses your computer
resources efficiently but may not be enough. Consider adjusting the
chunks
parameter manually. Aim to balance memory-usage and
speed. A small number of chunks
makes each chunk bigger and
may overwhelm your memory. A large number of chunks
may
take longer because of the overhead of caching each chunk.
Parameters
params
#> $chunks
#> [1] ""
#>
#> $order
#> [1] "sample"
#>
#> $cache_dir
#> [1] ""
#>
#> $input
#> [1] "input"
#>
#> $output
#> [1] "output"
#>
#> $europages_companies
#> [1] "europages_companies.csv"
#>
#> $ecoinvent_activities
#> [1] "ecoinvent_activities.csv"
#>
#> $ecoinvent_europages
#> [1] "ecoinvent_europages.csv"
#>
#> $isic
#> [1] "isic.csv"
#>
#> $sector_profile_companies
#> [1] "sector_profile_companies.csv"
#>
#> $sector_profile_any_scenarios
#> [1] "sector_profile_any_scenarios.csv"
options(
# Determines the number of chunks
tiltWorkflows.chunks = params$chunks,
# Determines the order in which the chunks run
tiltWorkflows.order = params$order,
# Determines where to store the cache
tiltWorkflows.cache_dir = params$cache_dir,
# Read data quietly
readr.show_col_types = FALSE,
# Make printed output wider
width = 500
)
# Enable computing over multiple workers in parallel
plan(multisession)
# Ensure input/ and output/ directories
if (!dir_exists(params$input)) use_toy_input()
if (!dir_exists(params$output)) dir_create(params$output)
Session information
getwd()
#> [1] "/home/runner/work/tiltWorkflows/tiltWorkflows/vignettes/articles"
availableCores()
#> system
#> 4
dir_tree(params$input)
#> input
#> ├── ecoinvent_activities.csv
#> ├── ecoinvent_europages.csv
#> ├── ecoinvent_inputs.csv
#> ├── emissions_profile_any_companies.csv
#> ├── emissions_profile_products.csv
#> ├── emissions_profile_upstream_products.csv
#> ├── europages_companies.csv
#> ├── isic.csv
#> ├── sector_profile_any_scenarios.csv
#> ├── sector_profile_companies.csv
#> ├── sector_profile_upstream_companies.csv
#> └── sector_profile_upstream_products.csv
dir_tree(params$output)
#> output
#> ├── emissions_profile_at_company_level.csv
#> ├── emissions_profile_at_product_level.csv
#> ├── emissions_profile_upstream_at_company_level.csv
#> ├── emissions_profile_upstream_at_product_level.csv
#> ├── sector_profile_upstream_at_company_level.csv
#> └── sector_profile_upstream_at_product_level.csv
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.4.2 (2024-10-31)
#> os Ubuntu 24.04.1 LTS
#> system x86_64, linux-gnu
#> ui X11
#> language en
#> collate C.UTF-8
#> ctype C.UTF-8
#> tz UTC
#> date 2025-02-19
#> pandoc 3.1.11 @ /opt/hostedtoolcache/pandoc/3.1.11/x64/ (via rmarkdown)
#> quarto NA
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> bslib 0.9.0 2025-01-30 [1] RSPM
#> cachem 1.1.0 2024-05-16 [1] RSPM
#> cli 3.6.4 2025-02-13 [1] RSPM
#> codetools 0.2-20 2024-03-31 [3] CRAN (R 4.4.2)
#> crayon 1.5.3 2024-06-20 [1] RSPM
#> dchunkr 0.0.0.9001 2025-02-19 [1] Github (maurolepore/dchunkr@9748350)
#> desc 1.4.3 2023-12-10 [1] RSPM
#> digest 0.6.37 2024-08-19 [1] RSPM
#> dplyr * 1.1.4 2023-11-17 [1] RSPM
#> evaluate 1.0.3 2025-01-10 [1] RSPM
#> fastmap 1.2.0 2024-05-15 [1] RSPM
#> fs * 1.6.5 2024-10-30 [1] RSPM
#> furrr 0.3.1 2022-08-15 [1] RSPM
#> future * 1.34.0 2024-07-29 [1] RSPM
#> generics 0.1.3 2022-07-05 [1] RSPM
#> globals 0.16.3 2024-03-08 [1] RSPM
#> glue 1.8.0 2024-09-30 [1] RSPM
#> hms 1.1.3 2023-03-21 [1] RSPM
#> htmltools 0.5.8.1 2024-04-04 [1] RSPM
#> htmlwidgets 1.6.4 2023-12-06 [1] RSPM
#> httpuv 1.6.15 2024-03-26 [1] RSPM
#> jquerylib 0.1.4 2021-04-26 [1] RSPM
#> jsonlite 1.8.9 2024-09-20 [1] RSPM
#> knitr 1.49 2024-11-08 [1] RSPM
#> later 1.4.1 2024-11-27 [1] RSPM
#> lifecycle 1.0.4 2023-11-07 [1] RSPM
#> listenv 0.9.1 2024-01-29 [1] RSPM
#> magrittr 2.0.3 2022-03-30 [1] RSPM
#> memoise 2.0.1 2021-11-26 [1] RSPM
#> mime 0.12 2021-09-28 [1] RSPM
#> parallelly 1.42.0 2025-01-30 [1] RSPM
#> pillar 1.10.1 2025-01-07 [1] RSPM
#> pkgconfig 2.0.3 2019-09-22 [1] RSPM
#> pkgdown 2.1.1 2024-09-17 [1] RSPM
#> promises 1.3.2 2024-11-28 [1] RSPM
#> purrr 1.0.4 2025-02-05 [1] RSPM
#> R6 2.6.1 2025-02-15 [1] RSPM
#> ragg 1.3.3 2024-09-11 [1] RSPM
#> rappdirs * 0.3.3 2021-01-31 [1] RSPM
#> Rcpp 1.0.14 2025-01-12 [1] RSPM
#> readr * 2.1.5 2024-01-10 [1] RSPM
#> rlang 1.1.5 2025-01-17 [1] RSPM
#> rmarkdown 2.29 2024-11-04 [1] RSPM
#> sass 0.4.9 2024-03-15 [1] RSPM
#> sessioninfo 1.2.3 2025-02-05 [1] RSPM
#> shiny 1.10.0 2024-12-14 [1] RSPM
#> stringi 1.8.4 2024-05-06 [1] RSPM
#> stringr 1.5.1 2023-11-14 [1] RSPM
#> systemfonts 1.2.1 2025-01-20 [1] RSPM
#> textshaping 1.0.0 2025-01-20 [1] RSPM
#> tibble 3.2.1 2023-03-20 [1] RSPM
#> tidyr 1.3.1 2024-01-24 [1] RSPM
#> tidyselect 1.2.1 2024-03-11 [1] RSPM
#> tiltAddCO2 0.0.0.9002 2025-02-19 [1] Github (2DegreesInvesting/tiltAddCO2@ff86d35)
#> tiltIndicator 0.0.0.9230 2025-02-19 [1] Github (2DegreesInvesting/tiltIndicator@bff1cd9)
#> tiltIndicatorAfter * 0.0.0.9062 2025-02-19 [1] Github (2DegreesInvesting/tiltIndicatorAfter@158246b)
#> tiltToyData * 0.0.0.9204 2025-02-19 [1] Github (2DegreesInvesting/tiltToyData@3a2417a)
#> tiltWorkflows * 0.0.0.9033 2025-02-19 [1] local
#> tzdb 0.4.0 2023-05-12 [1] RSPM
#> vctrs 0.6.5 2023-12-01 [1] RSPM
#> withr 3.0.2 2024-10-28 [1] RSPM
#> xfun 0.50 2025-01-07 [1] RSPM
#> xtable 1.8-4 2019-04-21 [1] RSPM
#> yaml 2.3.10 2024-07-26 [1] RSPM
#>
#> [1] /home/runner/work/_temp/Library
#> [2] /opt/R/4.4.2/lib/R/site-library
#> [3] /opt/R/4.4.2/lib/R/library
#> * ── Packages attached to the search path.
#>
#> ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Data
This example defaults to using toy datasets but you may use the parameters of this file to instead use your own data.
europages_companies <- read_csv(path(params$input, params$europages_companies))
ecoinvent_activities <- read_csv(path(params$input, params$ecoinvent_activities))
ecoinvent_europages <- read_csv(path(params$input, params$ecoinvent_europages))
isic <- read_csv(path(params$input, params$isic))
Data specific to this indicator.
Sector profile
For this TILT indicator, compute results both at product and company level.
sector_profile <- profile_sector(
companies = sector_profile_companies,
scenarios = sector_profile_any_scenarios,
europages_companies = europages_companies,
ecoinvent_activities = ecoinvent_activities,
ecoinvent_europages = ecoinvent_europages,
isic = isic
)
#> Warning: Splitting `companies` into 4 chunks.
Results
Overview and save results at each level.
sector_profile |>
unnest_product() |>
print() |>
write_csv(path(params$output, "sector_profile_at_product_level.csv"))
#> # A tibble: 304 × 33
#> companies_id company_name country sector_profile reduction_targets scenario year ep_product matched_activity_name matched_reference_pr…¹ unit tilt_sector tilt_subsector multi_match matching_certainty matching_certainty_c…² company_city postcode address main_activity activity_uuid_produc…³ isic_4digit sector_scenario subsector_scenario min_headcount max_headcount ei_geography isic_4digit_name amount_of_distinct_p…⁴ amount_of_distinct_p…⁵ sector_profile_equal…⁶ sector_profile_best_…⁷
#> <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <lgl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <int> <int> <dbl> <dbl>
#> 1 antimonarchy_canine NA NA medium 0.18 1.5C RPS 2030 tent market for shed, lar… shed, large, wood, no… m2 constructi… construction … NA NA NA NA NA NA NA 76269c17-78d6-420b-99… '4100' buildings NA NA NA tilt_world NA 1 1 1 0.18
#> 2 antimonarchy_canine NA NA high 0.98 1.5C RPS 2050 tent market for shed, lar… shed, large, wood, no… m2 constructi… construction … NA NA NA NA NA NA NA 76269c17-78d6-420b-99… '4100' buildings NA NA NA tilt_world NA 1 1 1 0.98
#> 3 antimonarchy_canine NA NA high 0.4 NZ 2050 2030 tent market for shed, lar… shed, large, wood, no… m2 constructi… construction … NA NA NA NA NA NA NA 76269c17-78d6-420b-99… '4100' total residential NA NA tilt_world NA 1 1 1 0.4
#> 4 antimonarchy_canine NA NA high 0.97 NZ 2050 2050 tent market for shed, lar… shed, large, wood, no… m2 constructi… construction … NA NA NA NA NA NA NA 76269c17-78d6-420b-99… '4100' total residential NA NA tilt_world NA 1 1 1 0.97
#> 5 celestial_lovebird NA NA medium 0.18 1.5C RPS 2030 table hir… market for shed, lar… shed, large, wood, no… m2 constructi… construction … NA NA NA NA NA NA NA 76269c17-78d6-420b-99… '4100' buildings NA NA NA tilt_world NA 1 1 1 0.18
#> 6 celestial_lovebird NA NA high 0.98 1.5C RPS 2050 table hir… market for shed, lar… shed, large, wood, no… m2 constructi… construction … NA NA NA NA NA NA NA 76269c17-78d6-420b-99… '4100' buildings NA NA NA tilt_world NA 1 1 1 0.98
#> 7 celestial_lovebird NA NA high 0.4 NZ 2050 2030 table hir… market for shed, lar… shed, large, wood, no… m2 constructi… construction … NA NA NA NA NA NA NA 76269c17-78d6-420b-99… '4100' total residential NA NA tilt_world NA 1 1 1 0.4
#> 8 celestial_lovebird NA NA high 0.97 NZ 2050 2050 table hir… market for shed, lar… shed, large, wood, no… m2 constructi… construction … NA NA NA NA NA NA NA 76269c17-78d6-420b-99… '4100' total residential NA NA tilt_world NA 1 1 1 0.97
#> 9 nonphilosophical_lla… NA NA low 0.09 1.5C RPS 2030 surface e… market for deep draw… deep drawing, steel, … kg metals other metals NA NA NA NA NA NA NA 833caa78-30df-4374-90… '2591' industry other industry NA NA tilt_world NA 2 2 0.5 0.09
#> 10 nonphilosophical_lla… NA NA high 0.95 1.5C RPS 2050 surface e… market for deep draw… deep drawing, steel, … kg metals other metals NA NA NA NA NA NA NA 833caa78-30df-4374-90… '2591' industry other industry NA NA tilt_world NA 2 2 0.5 0.95
#> # ℹ 294 more rows
#> # ℹ abbreviated names: ¹matched_reference_product, ²matching_certainty_company_average, ³activity_uuid_product_uuid, ⁴amount_of_distinct_products, ⁵amount_of_distinct_products_matched, ⁶sector_profile_equal_weight, ⁷sector_profile_best_case
#> # ℹ 1 more variable: sector_profile_worst_case <dbl>
sector_profile |>
unnest_company() |>
print() |>
write_csv(path(params$output, "sector_profile_at_company_level.csv"))
#> # A tibble: 1,152 × 13
#> companies_id company_name country sector_profile_share sector_profile scenario year matching_certainty_company_average company_city postcode address main_activity reduction_targets_avg
#> <chr> <chr> <chr> <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
#> 1 antimonarchy_canine NA NA 0 high 1.5C RPS 2030 NA NA NA NA NA 0.18
#> 2 antimonarchy_canine NA NA 1 medium 1.5C RPS 2030 NA NA NA NA NA 0.18
#> 3 antimonarchy_canine NA NA 0 low 1.5C RPS 2030 NA NA NA NA NA 0.18
#> 4 antimonarchy_canine NA NA 0 NA 1.5C RPS 2030 NA NA NA NA NA 0.18
#> 5 antimonarchy_canine NA NA 1 high 1.5C RPS 2050 NA NA NA NA NA 0.98
#> 6 antimonarchy_canine NA NA 0 medium 1.5C RPS 2050 NA NA NA NA NA 0.98
#> 7 antimonarchy_canine NA NA 0 low 1.5C RPS 2050 NA NA NA NA NA 0.98
#> 8 antimonarchy_canine NA NA 0 NA 1.5C RPS 2050 NA NA NA NA NA 0.98
#> 9 antimonarchy_canine NA NA 1 high NZ 2050 2030 NA NA NA NA NA 0.4
#> 10 antimonarchy_canine NA NA 0 medium NZ 2050 2030 NA NA NA NA NA 0.4
#> # ℹ 1,142 more rows
The results at product and company level are now saved in the output/ directory.
# NOTE: If other workflows run before this one, this shows the results of all
params$output |> dir_tree()
#> output
#> ├── emissions_profile_at_company_level.csv
#> ├── emissions_profile_at_product_level.csv
#> ├── emissions_profile_upstream_at_company_level.csv
#> ├── emissions_profile_upstream_at_product_level.csv
#> ├── sector_profile_at_company_level.csv
#> ├── sector_profile_at_product_level.csv
#> ├── sector_profile_upstream_at_company_level.csv
#> └── sector_profile_upstream_at_product_level.csv
Cleanup
Here is the cache that allows you to resume after interruptions.
- The number of files is determined by
params$chunks
.
# NOTE: If other workflows run before this one, this shows the cache of all
cache_info()
#> # A tibble: 16 × 18
#> modification_time path type size permissions user group device_id hard_links special_device_id inode block_size blocks flags generation access_time change_time birth_time
#> <dttm> <fs::path> <fct> <fs::bytes> <fs::perms> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dttm> <dttm> <dttm>
#> 1 2025-02-19 22:41:13 /home/runner/.cache/tiltWorkflows/profile_emissions/1.rds file 196.9K rw-r--r-- runner docker 2065 1 0 1660577 4096 400 0 0 2025-02-19 22:41:14 2025-02-19 22:41:13 2025-02-19 22:41:13
#> 2 2025-02-19 22:41:13 /home/runner/.cache/tiltWorkflows/profile_emissions/2.rds file 195.8K rw-r--r-- runner docker 2065 1 0 1660578 4096 392 0 0 2025-02-19 22:41:14 2025-02-19 22:41:13 2025-02-19 22:41:13
#> 3 2025-02-19 22:41:14 /home/runner/.cache/tiltWorkflows/profile_emissions/3.rds file 202K rw-r--r-- runner docker 2065 1 0 1660579 4096 408 0 0 2025-02-19 22:41:14 2025-02-19 22:41:14 2025-02-19 22:41:14
#> 4 2025-02-19 22:41:14 /home/runner/.cache/tiltWorkflows/profile_emissions/4.rds file 201.9K rw-r--r-- runner docker 2065 1 0 1660580 4096 408 0 0 2025-02-19 22:41:14 2025-02-19 22:41:14 2025-02-19 22:41:14
#> 5 2025-02-19 22:41:15 /home/runner/.cache/tiltWorkflows/profile_emissions_upstream/1.rds file 803.6K rw-r--r-- runner docker 2065 1 0 1660582 4096 1608 0 0 2025-02-19 22:41:16 2025-02-19 22:41:15 2025-02-19 22:41:15
#> 6 2025-02-19 22:41:15 /home/runner/.cache/tiltWorkflows/profile_emissions_upstream/2.rds file 178.6K rw-r--r-- runner docker 2065 1 0 1660583 4096 360 0 0 2025-02-19 22:41:16 2025-02-19 22:41:15 2025-02-19 22:41:15
#> 7 2025-02-19 22:41:16 /home/runner/.cache/tiltWorkflows/profile_emissions_upstream/3.rds file 813.2K rw-r--r-- runner docker 2065 1 0 1660584 4096 1632 0 0 2025-02-19 22:41:16 2025-02-19 22:41:16 2025-02-19 22:41:16
#> 8 2025-02-19 22:41:16 /home/runner/.cache/tiltWorkflows/profile_emissions_upstream/4.rds file 812.4K rw-r--r-- runner docker 2065 1 0 1660585 4096 1632 0 0 2025-02-19 22:41:16 2025-02-19 22:41:16 2025-02-19 22:41:16
#> 9 2025-02-19 22:41:19 /home/runner/.cache/tiltWorkflows/profile_sector/1.rds file 97.8K rw-r--r-- runner docker 2065 1 0 1660596 4096 200 0 0 2025-02-19 22:41:20 2025-02-19 22:41:19 2025-02-19 22:41:19
#> 10 2025-02-19 22:41:19 /home/runner/.cache/tiltWorkflows/profile_sector/2.rds file 94.5K rw-r--r-- runner docker 2065 1 0 1660597 4096 192 0 0 2025-02-19 22:41:20 2025-02-19 22:41:19 2025-02-19 22:41:19
#> 11 2025-02-19 22:41:20 /home/runner/.cache/tiltWorkflows/profile_sector/3.rds file 96.2K rw-r--r-- runner docker 2065 1 0 1660598 4096 200 0 0 2025-02-19 22:41:20 2025-02-19 22:41:20 2025-02-19 22:41:20
#> 12 2025-02-19 22:41:20 /home/runner/.cache/tiltWorkflows/profile_sector/4.rds file 94.1K rw-r--r-- runner docker 2065 1 0 1660599 4096 192 0 0 2025-02-19 22:41:20 2025-02-19 22:41:20 2025-02-19 22:41:20
#> 13 2025-02-19 22:41:21 /home/runner/.cache/tiltWorkflows/profile_sector_upstream/1.rds file 100.5K rw-r--r-- runner docker 2065 1 0 1660601 4096 208 0 0 2025-02-19 22:41:21 2025-02-19 22:41:21 2025-02-19 22:41:21
#> 14 2025-02-19 22:41:21 /home/runner/.cache/tiltWorkflows/profile_sector_upstream/2.rds file 119.8K rw-r--r-- runner docker 2065 1 0 1660602 4096 240 0 0 2025-02-19 22:41:21 2025-02-19 22:41:21 2025-02-19 22:41:21
#> 15 2025-02-19 22:41:21 /home/runner/.cache/tiltWorkflows/profile_sector_upstream/3.rds file 101.2K rw-r--r-- runner docker 2065 1 0 1660603 4096 208 0 0 2025-02-19 22:41:21 2025-02-19 22:41:21 2025-02-19 22:41:21
#> 16 2025-02-19 22:41:21 /home/runner/.cache/tiltWorkflows/profile_sector_upstream/4.rds file 135.9K rw-r--r-- runner docker 2065 1 0 1660604 4096 272 0 0 2025-02-19 22:41:21 2025-02-19 22:41:21 2025-02-19 22:41:21
If you want to recompute some result, you must first delete the relevant cache:
library(fs)
library(rappdirs)
dir_delete(user_cache_dir("tiltWorkflows/PROFILE-DIRECTORY-YOU-WANT-TO-DELETE"))
# DANGER: Or delete the entire default cache directory with
cache_delete()