Skip to contents

Setup

Use the required packages, read your loanbook and tilt datasets, check your loanbook and suggest matches. For a gentle walkthrough see Get started.

library(dplyr, warn.conflicts = FALSE)
library(vroom)
library(tilt.company.match)

loanbook <- vroom(example_file("demo_loanbook.csv"), show_col_types = FALSE)
tilt <- vroom(example_file("demo_tilt.csv"), show_col_types = FALSE)

loanbook %>% check_loanbook()
#> Found duplicate(s) on columns company_name, postcode, country of the data set.
#>  Found for the company Peasant Peter, postcode: 01234, country: germany
#>  Please check if these duplicates are intended and have an unique id.

to_edit <- loanbook %>% suggest_match(tilt)

# Edited file after manual validation (File replacement from the user is required)
edited <- vroom(example_file("demo_matched.csv"), show_col_types = FALSE)
edited
#> # A tibble: 18 × 13
#>       id compa…¹ postc…² country misc_…³ compa…⁴ id_tilt compa…⁵ misc_…⁶ compa…⁷
#>    <dbl> <chr>   <chr>   <chr>   <chr>   <chr>     <dbl> <chr>   <chr>   <chr>  
#>  1     1 Peasan… 01234   germany A       peasan…       1 Peasan… A       peasan…
#>  2     1 Peasan… 01234   germany A       peasan…       2 Peasan… Z       peasan…
#>  3     1 Peasan… 01234   germany A       peasan…       4 Peasan… B       peasan…
#>  4     2 Peasan… 01234   germany Z       peasan…       1 Peasan… A       peasan…
#>  5     2 Peasan… 01234   germany Z       peasan…       2 Peasan… Z       peasan…
#>  6     2 Peasan… 01234   germany Z       peasan…       4 Peasan… B       peasan…
#>  7     3 Peasan… 11234   germany Z       peasan…       3 Peasan… Z       peasan…
#>  8     4 Peasan… 01234   germany Z       peasan…       4 Peasan… B       peasan…
#>  9     4 Peasan… 01234   germany Z       peasan…       1 Peasan… A       peasan…
#> 10     4 Peasan… 01234   germany Z       peasan…       2 Peasan… Z       peasan…
#> 11     5 Bread … 23456   germany C       breadb…       5 The Br… C       thebre…
#> 12     6 Flower… 34567   germany Z       flower…       7 Flower… F       flower…
#> 13     6 Flower… 34567   germany Z       flower…       6 Flower… D       flower…
#> 14     7 Screwd… 45678   germany D       screwd…      NA NA      NA      NA     
#> 15     8 Screwd… 45678   germany Z       screwd…      NA NA      NA      NA     
#> 16     9 John M… 56789   germany E       johnme…       8 John a… E       johnja…
#> 17    10 John M… 55555   germany Y       johnme…      NA NA      NA      NA     
#> 18    11 John M… 55555   norway  Y       johnme…      NA NA      NA      NA     
#> # … with 3 more variables: string_sim <dbl>, suggest_match <lgl>,
#> #   accept_match <lgl>, and abbreviated variable names ¹​company_name,
#> #   ²​postcode, ³​misc_info, ⁴​company_alias, ⁵​company_name_tilt, ⁶​misc_info_tilt,
#> #   ⁷​company_alias_tilt

Share of matched companies after manual validation from total loanbook companies

accepted <- edited %>%
  filter(accept_match)

matched_rows <- accepted %>% nrow()

total_rows <- loanbook %>%
  distinct(id) %>%
  nrow()

matched_share <- matched_rows / total_rows
matched_share
#> [1] 0.3333333

Number and share of matched companies classified by misc_info from total loanbook companies

Please replace the misc_info column with variables like sectors, headcount or similar classifier to calculate the share based on that classifier.

x_misc <- loanbook %>% count(misc_info)
y_misc <- accepted %>% count(misc_info)
misc_share <-
  left_join(x_misc, y_misc, by = c("misc_info"), suffix = c("_total", "_merged")) %>%
  mutate(n_share = n_merged / n_total)

Sum of loan values and their share from total value after classified by misc_info

The loan_amount column is added as a sample column to calculate the loan exposure output.

# Sample column and dataframe
accepted_loan <- accepted %>% mutate(loan_amount = c(1000))

# Please replace the name of sample column `loan_amount` and the `accepted_loan`
# dataframe with the original loan column name and loanbook dataframe!
exposure_sum <- accepted_loan %>%
  group_by(misc_info) %>%
  summarise(sum_expo = sum(loan_amount)) %>%
  mutate(sum_share = sum_expo / sum(sum_expo))