From b4cc9b8fb09cb43c5a2bc7b662e3cbd924a00ccf Mon Sep 17 00:00:00 2001 From: Falk Benke Date: Mon, 4 Nov 2024 16:05:29 +0100 Subject: [PATCH] move tool_fix_IEA_data_for_Industry_subsectors --- .buildlibrary | 2 +- CITATION.cff | 2 +- DESCRIPTION | 2 +- NAMESPACE | 4 + R/tool_fix_IEA_data_for_Industry_subsectors.R | 689 ++++++++++++++++++ README.md | 6 +- ...ol_fix_IEA_data_for_Industry_subsectors.Rd | 38 + 7 files changed, 737 insertions(+), 6 deletions(-) create mode 100644 R/tool_fix_IEA_data_for_Industry_subsectors.R create mode 100644 man/tool_fix_IEA_data_for_Industry_subsectors.Rd diff --git a/.buildlibrary b/.buildlibrary index 01840006..af559b03 100644 --- a/.buildlibrary +++ b/.buildlibrary @@ -1,4 +1,4 @@ -ValidationKey: '600930' +ValidationKey: '801240' AutocreateReadme: yes AcceptedWarnings: - 'Warning: package ''.*'' was built under R version' diff --git a/CITATION.cff b/CITATION.cff index caa988e8..8605e9b2 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -2,7 +2,7 @@ cff-version: 1.2.0 message: If you use this software, please cite it using the metadata from this file. type: software title: 'mrindustry: input data generation for the REMIND industry module' -version: 0.3.0 +version: 0.4.0 date-released: '2024-11-04' abstract: The mrindustry packages contains data preprocessing for the REMIND model. authors: diff --git a/DESCRIPTION b/DESCRIPTION index a0ee1cdb..5122c3b5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: mrindustry Title: input data generation for the REMIND industry module -Version: 0.3.0 +Version: 0.4.0 Date: 2024-11-04 Authors@R: c( person(given = "Falk", family = "Benke", email = "benke@pik-potsdam.de", diff --git a/NAMESPACE b/NAMESPACE index bf233ad3..b4fa012f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,6 +30,7 @@ export(readindustry_subsectors_specific) export(readvanRuijven2016) export(readworldsteel) export(tool_expand_tibble) +export(tool_fix_IEA_data_for_Industry_subsectors) import(GDPuc) import(madrat) import(magclass) @@ -123,6 +124,9 @@ importFrom(quitte,order.levels) importFrom(quitte,overwrite) importFrom(quitte,seq_range) importFrom(quitte,sum_total_) +importFrom(readr,col_character) +importFrom(readr,col_skip) +importFrom(readr,cols) importFrom(readr,read_csv) importFrom(readr,read_delim) importFrom(readr,read_rds) diff --git a/R/tool_fix_IEA_data_for_Industry_subsectors.R b/R/tool_fix_IEA_data_for_Industry_subsectors.R new file mode 100644 index 00000000..50630b8a --- /dev/null +++ b/R/tool_fix_IEA_data_for_Industry_subsectors.R @@ -0,0 +1,689 @@ +#' Apply corrections to IEA data needed for Industry subsectors +#' +#' Apply corrections to IEA data to cope with fragmentary time series and +#' replace outputs from blast furnaces and coke ovens, that are inputs into +#' industry subsectors, by their respective inputs. +#' The corrections done by this function are rather rudimentary and crude. This +#' gets smoothed away in regional aggregation. But do not use the resulting +#' country-level data without additional scrutiny. +#' +#' Use regional or global averages if IEA industry data lists energy use only as +#' "non-specified". +#' Outputs from blast furnaces (`BLFURGS`, `OGASES`) and coke ovens (`OVENCOKE`, +#' `COKEOVGS`, `COALTAR`, `NONCRUDE`), that are inputs into industry subsectors. +#' Used internally in [`calcIO()`] for subtype `output_Industry_subsectors`. +#' +#' @md +#' @param data MAgPIE object containing the IEA Energy Balances data +#' +#' @param ieamatch mapping of IEA product/flow combinations to REMIND +#' `sety`/`fety`/`te` combinations as used in [`calcIO()`] +#' +#' @param threshold minimum share each industry subsector uses of each product. +#' Defaults to 1 %. +#' +#' @return a MAgPIE object +#' +#' @author Michaja Pehl +#' +#' @importFrom assertr not_na assert +#' @importFrom dplyr anti_join group_by inner_join left_join mutate pull rename +#' select summarise +#' @importFrom readr read_delim cols col_skip col_character +#' @importFrom quitte cartesian interpolate_missing_periods overwrite +#' character.data.frame interpolate_missing_periods_ +#' @importFrom rlang .data +#' @importFrom tibble as_tibble +#' @importFrom tidyr complete gather nesting spread +#' @export +tool_fix_IEA_data_for_Industry_subsectors <- function(data, ieamatch, + threshold = 1e-2) { + + . <- NULL + + # replace coke oven and blast furnace outputs ---- + .clean_data <- function(m, keep_zeros = FALSE) { + m %>% + as.data.frame() %>% + as_tibble() %>% + select(iso3c = 'Region', year = 'Year', product = 'Data1', flow = 'Data2', + value = 'Value') %>% + filter(0 != .data$value | keep_zeros) %>% + character.data.frame() %>% + mutate(year = as.integer(.data$year)) + } + + ## flow definitions ---- + IEA_flows <- tribble( + ~summary.flow, ~flow, + # Total Primary Energy Production + 'TPES', 'INDPROD', # primary energy production + 'TPES', 'IMPORTS', + 'TPES', 'EXPORTS', + 'TPES', 'MARBUNK', # international marine bunkers + 'TPES', 'AVBUNK', # international aviation bunkers + NA_character_, 'TRANSFER', # inter-product transfers, product transfers, + # and recycling + 'TPES', 'STOCKCHA', # stock changes + + # Transformation Processes + 'TOTTRANF', 'MAINELEC', # main activity producer electricity plants + 'TOTTRANF', 'AUTOELEC', # autoproducer electricity plants + 'TOTTRANF', 'MAINCHP', # main activity producer CHP plants + 'TOTTRANF', 'AUTOCHP', # autoproducer electricity plants + 'TOTTRANF', 'MAINHEAT', # main activity producer heat plants + 'TOTTRANF', 'AUTOHEAT', # autoproducer heat plants + 'TOTTRANF', 'THEAT', # heat pumps + 'TOTTRANF', 'TBOILER', # electric boilers + 'TOTTRANF', 'TELE', # chemical heat for electricity production + 'TOTTRANF', 'TBLASTFUR', # blast furnaces + 'TOTTRANF', 'TGASWKS', # gas works + 'TOTTRANF', 'TCOKEOVS', # coke ovens + 'TOTTRANF', 'TPATFUEL', # patent fuel plants + 'TOTTRANF', 'TBKB', # peat briquette plants + 'TOTTRANF', 'TREFINER', # oil refineries + 'TOTTRANF', 'TPETCHEM', # petrochemical plants + 'TOTTRANF', 'TCOALLIQ', # coal liquefaction plants + 'TOTTRANF', 'TGTL', # gas-to-liquid plants + 'TOTTRANF', 'TBLENDGAS', # blended natural gas + 'TOTTRANF', 'TCHARCOAL', # charcoal production plants + 'TOTTRANF', 'TNONSPEC', # non-specified transformation + + # Energy Industry Own Use and Losses + 'TOTENGY', 'EMINES', # coal mines + 'TOTENGY', 'EOILGASEX', # oil and gas extraction + 'TOTENGY', 'EBLASTFUR', # blast furnaces + 'TOTENGY', 'EGASWKS', # gas works + 'TOTENGY', 'EBIOGAS', # gasifications plants for biogases + 'TOTENGY', 'ECOKEOVS', # coke ovens + 'TOTENGY', 'EPATFUEL', # patent fuel plants + 'TOTENGY', 'EBKB', # peat briquette plants + 'TOTENGY', 'EREFINER', # oil refineries + 'TOTENGY', 'ECOALLIQ', # coal liquefaction plants + 'TOTENGY', 'ELNG', # liquefaction/regasification plants + 'TOTENGY', 'EGTL', # gas-to-liquied plants + 'TOTENGY', 'EPOWERPLT', # own use in electricity, CHP, and heat + # plants + 'TOTENGY', 'EPUMPST', # pumped storage plants + 'TOTENGY', 'ENUC', # nuclear industry + 'TOTENGY', 'ECHARCOAL', # charcoal production plants + 'TOTENGY', 'ENONSPEC', # non-specified energy industry + 'TOTENGY', 'DISTLOSS', # distribution and transmission losses + + # Final Consumption + 'TFC', 'IRONSTL', # iron and steel + 'TFC', 'CHEMICAL', # chemical and petrochemical + 'TFC', 'NONFERR', # non-ferrous metals + 'TFC', 'NONMET', # non-metallic minerals + 'TFC', 'TRANSEQ', # transport equipment + 'TFC', 'MACHINE', # machinery + 'TFC', 'MINING', # mining and quarrying + 'TFC', 'FOODPRO', # food production + 'TFC', 'PAPERPRO', # paper, pulp, and print + 'TFC', 'WOODPRO', # wood and wood products + 'TFC', 'CONSTRUC', # construction + 'TFC', 'TEXTILES', # textiles + 'TFC', 'INONSPEC', # non-specified industry + 'TFC', 'WORLDAV', # world aviation bunkers + 'TFC', 'DOMESAIR', # domestic aviation + 'TFC', 'ROAD', # road + 'TFC', 'RAIL', # rail + 'TFC', 'PIPELINE', # pipeline transport + 'TFC', 'WORLDMAR', # world marine bunkers + 'TFC', 'DOMESNAV', # domestic navigation + 'TFC', 'TRNONSPE', # non-specified transport + 'TFC', 'RESIDENT', # residential + 'TFC', 'COMMPUB', # commercial and public services + 'TFC', 'AGRICULT', # agriculture/forestry + 'TFC', 'FISHING', # fishing + 'TFC', 'ONONSPEC', # non-specified other consumption + + + 'TOTIND', 'IRONSTL', # iron and steel + 'TOTIND', 'CHEMICAL', # chemical and petrochemical + 'TOTIND', 'NONFERR', # non-ferrous metals + 'TOTIND', 'NONMET', # non-metallic minerals + 'TOTIND', 'TRANSEQ', # transport equipment + 'TOTIND', 'MACHINE', # machinery + 'TOTIND', 'MINING', # mining and quarrying + 'TOTIND', 'FOODPRO', # food production + 'TOTIND', 'PAPERPRO', # paper, pulp, and print + 'TOTIND', 'WOODPRO', # wood and wood products + 'TOTIND', 'CONSTRUC', # construction + 'TOTIND', 'TEXTILES', # textiles + 'TOTIND', 'INONSPEC', # non-specified industry + + # Transport + 'TOTTRANS', 'WORLDAV', # world aviation bunkers + 'TOTTRANS', 'DOMESAIR', # domestic aviation + 'TOTTRANS', 'ROAD', # road + 'TOTTRANS', 'RAIL', # rail + 'TOTTRANS', 'PIPELINE', # pipeline transport + 'TOTTRANS', 'WORLDMAR', # world marine bunkers + 'TOTTRANS', 'DOMESNAV', # domestic navigation + 'TOTTRANS', 'TRNONSPE', # non-specified transport + + # Other Consumption + 'TOTOTHER', 'RESIDENT', # residential + 'TOTOTHER', 'COMMPUB', # commercial and public services + 'TOTOTHER', 'AGRICULT', # agriculture/forestry + 'TOTOTHER', 'FISHING', # fishing + 'TOTOTHER', 'ONONSPEC', # non-specified other consumption + + # Non-Energy Use + 'NONENUSE', 'NEINTREN', # non-energy use in industry/transformation/ + # energy + NA_character_, 'NECHEM', # non-energy use chemical/petrochemical + 'NONENUSE', 'NETRANS', # non-energy use in transport + 'NONENUSE', 'NEOTHER', # non-energy use in other + + # Electricity Output + 'ELOUTPUT', 'ELMAINE', # main activity producer electricity plants + 'ELOUTPUT', 'ELAUTOE', # autoproducer electricity plants + 'ELOUTPUT', 'ELMAINC', # main activity producer CHP plants + 'ELOUTPUT', 'ELAUTOC', # autoproducer CHP plants + + # Heat Output + 'HEATOUT', 'HEMAINC', # main activity producer CHP plants + 'HEATOUT', 'HEAUTOC', # autoproducer CHP plants + 'HEATOUT', 'HEMAINH', # main activity producer heat plants + 'HEATOUT', 'HEAUTOH' # autoproducer heat plants + ) + + base_flows <- unique(IEA_flows$flow) + summary_flows <- unique(na.omit(IEA_flows$summary.flow)) + all_flows <- c(base_flows, summary_flows) + + ### blast furnace flows to be replaced ---- + # all transformation, energy system and final consumption flows, except for + # those related to blast furnaces + flow_BLASTFUR_to_replace <- setdiff(all_flows, c('EBLASTFUR', 'TBLASTFUR')) + + ### coke oven flows to be replaced ---- + # all transformation, energy system and final consumption flows, except for + # those related to coke ovens + flow_COKEOVS_to_replace <- setdiff(all_flows, c('ECOKEOVS', 'TCOKEOVS')) + + ## blast furnace data ---- + # all products in/out of blast furnace transformation and energy demand, except + # summary flows 'TOTAL' and 'MRENEW' + data_BLASTFUR <- data %>% + `[`(,, c('EBLASTFUR', 'TBLASTFUR'), pmatch = 'right') %>% + `[`(,, c('TOTAL', 'MRENEW'), pmatch = 'left', invert = TRUE) %>% + .clean_data() %>% + group_by(!!!syms(c('iso3c', 'year', 'product'))) %>% + summarise(value = sum(.data$value), .groups = 'drop') + + ### blast furnace inputs ---- + # inputs into transformation/energy system are negative + data_BLASTFUR_inputs <- data_BLASTFUR %>% + filter(0 > .data$value) + + ### blast furnace outputs ---- + # outputs from transformation are positive + data_BLASTFUR_outputs <- data_BLASTFUR %>% + filter(0 < .data$value) + + ### blast furnace output products ---- + # products blast furnaces supply to other flows + outputs_BLASTFUR <- data_BLASTFUR_outputs %>% + pull('product') %>% + unique() + + ### product/flow to be replaced ---- + # all blast furnace outputs and flows to be replaced, that are actually present + # in the data + product_flow_BLASTFUR_to_replace <- intersect( + cartesian(outputs_BLASTFUR, flow_BLASTFUR_to_replace), + getNames(data)) + + ### blast furnace product use ---- + data_BLASTFUR_use <- data %>% + `[`(,, product_flow_BLASTFUR_to_replace) %>% + .clean_data() + + ## blast furnace replacement data ---- + # outputs are replaced joule-by-joule with inputs, according to the input shares + # right_join() filters out countries/years that do not use blast furnace + # products + data_BLASTFUR_replacement <- right_join( + data_BLASTFUR_inputs %>% + group_by(!!!syms(c('iso3c', 'year'))) %>% + mutate(factor = .data$value / sum(.data$value)) %>% + ungroup() %>% + select(-'value'), + + data_BLASTFUR_use %>% + select(-'product'), + + c('iso3c', 'year') + ) %>% + # FIXME: filter countries/years that have no inputs into blast furnaces, yet + # outputs from them and use of blast furnace products (e.g. ISR 1973) + filter(!is.na(.data$product)) %>% + assert(not_na, everything(), + description = 'Only valid blast furnace replacement data') %>% + mutate(value = .data$value * .data$factor) %>% + group_by(!!!syms(c('iso3c', 'year', 'product', 'flow'))) %>% + summarise(value = sum(.data$value), .groups = 'drop') + + ## coke oven data ---- + # all products in/out of coke oven transformation and energy demand, except + # summary flows 'TOTAL' and 'MRENEW' + data_COKEOVS <- data %>% + `[`(,, c('ECOKEOVS', 'TCOKEOVS'), pmatch = 'right') %>% + `[`(,, c('TOTAL', 'MRENEW'), pmatch = 'left', invert = TRUE) %>% + .clean_data() %>% + group_by(!!!syms(c('iso3c', 'year', 'product'))) %>% + summarise(value = sum(.data$value), .groups = 'drop') + + #### apply blast furnace replacement ---- + # Coke ovens and blast furnaces can be both inputs and outputs to one another at + # the same time. To untangle this, we first replace blast furnace outputs that + # are inputs into coke ovens by coke oven outputs, which are netted with the + # direct outputs (here), and then replace coke oven outputs that are blast + # furnace inputs by coke oven inputs (further below). + data_COKEOVS <- bind_rows( + data_COKEOVS %>% + filter(!.data$product %in% outputs_BLASTFUR), + + data_BLASTFUR_replacement %>% + filter(.data$flow %in% c('ECOKEOVS', 'TCOKEOVS')) %>% + select(-'flow') + ) %>% + group_by(!!!syms(c('iso3c', 'year', 'product'))) %>% + summarise(value = sum(.data$value), .groups = 'drop') + + ### coke oven inputs ---- + # inputs into transformation/energy system are negative + data_COKEOVS_inputs <- data_COKEOVS %>% + filter(0 > .data$value) + + ### coke oven outputs ---- + # outputs from transformation are positive + data_COKEOVS_outputs <- data_COKEOVS %>% + filter(0 < .data$value) + + ### coke oven output products ---- + # products coke ovens supply to other flows + outputs_COKEOVS <- data_COKEOVS_outputs %>% + pull('product') %>% + unique() + + ### product/flows to be replaced ---- + # all coke oven outputs and flows to be replaced, that are actually present in + # the data + product_flow_COKEOVS_to_replace <- intersect( + cartesian(outputs_COKEOVS, flow_COKEOVS_to_replace), + getNames(data)) + + ### coke oven product use ---- + data_COKEOVS_use <- data %>% + `[`(,, product_flow_COKEOVS_to_replace) %>% + .clean_data() + + ## coke oven replacement data ---- + # outputs are replaced joule-by-joule with inputs, according to the input shares + # right_join() filters out countries/years that do not use coke oven products + data_COKEOVS_replacement <- right_join( + data_COKEOVS_inputs %>% + group_by(!!!syms(c('iso3c', 'year'))) %>% + mutate(factor = .data$value / sum(.data$value)) %>% + ungroup() %>% + select(-'value'), + + data_COKEOVS_use %>% + select(-'product'), + + c('iso3c', 'year') + ) %>% + # FIXME: filter countries/year that have no inputs into coke ovens but use + # coke oven outputs (which most likely are imported) + filter(!is.na(.data$product)) %>% + assert(not_na, everything(), + description = 'Only valid coke oven replacement data') %>% + mutate(value = .data$value * .data$factor) %>% + select('iso3c', 'year', 'product', 'flow', 'value') + + ## coke oven loss data ---- + # coke oven losses (true losses from ECOKEOVS and transformation energy from + # TCOKEOVS) are allotted to the IRONSTL sector + # losses are the difference of inputs and outputs, weighted by input shares + # right_join() filters out countries/years that do not use coke oven products + data_COKEOVS_loss <- right_join( + data_COKEOVS_inputs, + + data_COKEOVS_outputs %>% + group_by(!!!syms(c('iso3c', 'year'))) %>% + summarise(output = sum(.data$value), .groups = 'drop'), + + c('iso3c', 'year') + ) %>% + # FIXME: filter countries/year that have no inputs into coke ovens but use + # coke oven outputs (which most likely are imported) + filter(!is.na(.data$product)) %>% + assert(not_na, everything(), + description = 'Only valid coke oven loss data') %>% + group_by(!!!syms(c('iso3c', 'year'))) %>% + mutate(value = (sum(-.data$value) - .data$output) + * .data$value / sum(.data$value), + flow = 'IRONSTL') %>% + ungroup() %>% + select('iso3c', 'year', 'product', 'flow', 'value') + + ## recalculate blast furnace inputs w/ coke oven replacements ---- + + #### apply coke oven replacement ---- + # Coke ovens and blast furnaces can be both inputs and outputs to one another at + # the same time. To untangle this, we first replace blast furnace outputs that + # are inputs into coke ovens by coke oven outputs, which are netted with the + # direct outputs (above), and then replace coke oven outputs that are blast + # furnace inputs by coke oven inputs (here). + data_BLASTFUR <- bind_rows( + data_BLASTFUR %>% + filter(!.data$product %in% outputs_COKEOVS), + + data_COKEOVS_replacement %>% + filter(.data$flow %in% c('EBLATFUR', 'TBLASTFUR')) %>% + select(-'flow') + ) %>% + group_by(!!!syms(c('iso3c', 'year', 'product'))) %>% + summarise(value = sum(.data$value), .groups = 'drop') + + ### blast furnace inputs ---- + # inputs into transformation/energy system are negative + data_BLASTFUR_inputs <- data_BLASTFUR %>% + filter(0 > .data$value) + + ### blast furnace replacement data ---- + # outputs are replaced joule-by-joule with inputs, according to the input shares + # right_join() filters out countries/years that do not use blast furnace + # products + data_BLASTFUR_replacement <- right_join( + data_BLASTFUR_inputs %>% + group_by(!!!syms(c('iso3c', 'year'))) %>% + mutate(factor = .data$value / sum(.data$value)) %>% + ungroup() %>% + select(-'value'), + + data_BLASTFUR_use %>% + select(-'product'), + + c('iso3c', 'year') + ) %>% + # FIXME: filter countries/years that have no inputs into blast furnaces, yet + # outputs from them and use of blast furnace products (e.g. ISR 1973) + filter(!is.na(.data$product)) %>% + assert(not_na, everything(), + description = 'Only valid blast furnace replacement data') %>% + mutate(value = .data$value * .data$factor) %>% + group_by(!!!syms(c('iso3c', 'year', 'product', 'flow'))) %>% + summarise(value = sum(.data$value), .groups = 'drop') + + ## blast furnace loss data ---- + # blast furnace losses (true losses from EBLASTFUR and transformation energy + # from TBLASTFUR) are allotted to the IRONSTL sector + # losses are the difference of inputs and outputs, weighted by input shares + # right_join() filters out countries/years that do not use blast furnace + # products + data_BLASTFUR_loss <- right_join( + data_BLASTFUR_inputs, + + data_BLASTFUR_outputs %>% + group_by(!!!syms(c('iso3c', 'year'))) %>% + summarise(output = sum(.data$value), .groups = 'drop'), + + c('iso3c', 'year') + ) %>% + # FIXME: filter countries/years that have no inputs into blast furnaces, yet + # outputs from them and use of blast furnace products (e.g. ISR 1973) + filter(!is.na(.data$product)) %>% + assert(not_na, everything(), + description = 'Only valid blast furnace loss data') %>% + group_by(!!!syms(c('iso3c', 'year'))) %>% + mutate(value = (sum(-.data$value) - .data$output) + * .data$value / sum(.data$value), + flow = 'IRONSTL') %>% + ungroup() %>% + select('iso3c', 'year', 'product', 'flow', 'value') + + ## replace coke oven and blast furnace products ---- + data_replace <- bind_rows( + # filter already replaced data + data_COKEOVS_replacement %>% + filter(!.data$flow %in% c('EBLATFUR', 'TBLASTFUR')), + + data_BLASTFUR_replacement %>% + filter(!.data$flow %in% c('ECOKEOVS', 'TCOKEOVS')), + + data_COKEOVS_loss %>% + sum_total_('product', name = 'TOTAL'), + + data_BLASTFUR_loss %>% + sum_total_('product', name = 'TOTAL') + ) %>% + group_by(!!!syms(setdiff(colnames(.), 'value'))) %>% + summarise(value = sum(.data$value), .groups = 'drop') %>% + arrange(!!!syms(c('iso3c', 'year', 'product', 'flow'))) %>% + as.magpie(spatial = 1, temporal = 2, data = ncol(.)) + + data_replace[is.na(data_replace)] <- 0 + + regions_keep <- sort(getRegions(data)) + years_keep <- sort(getYears(data)) + names_keep <- sort(setdiff(getNames(data), + c(product_flow_COKEOVS_to_replace, + product_flow_BLASTFUR_to_replace))) + + regions_replace <- sort(getRegions(data_replace)) + years_replace <- sort(getYears(data_replace)) + names_replace <- sort(getNames(data_replace)) + + data_fixed <- new.magpie(cells_and_regions = regions_keep, years = years_keep, + names = unique(c(names_keep, names_replace)), + fill = 0) + + data_fixed[regions_keep,years_keep,names_keep] <- data %>% + `[`(regions_keep, years_keep, names_keep) + + data_fixed[regions_replace,years_replace,names_replace] <- ( + data_fixed[regions_replace,years_replace,names_replace] + + data_replace[regions_replace,years_replace,names_replace] + ) + + data <- data_fixed + + # all industry subsector flows + flows_to_fix <- c('IRONSTL', 'CHEMICAL', 'NONFERR', 'NONMET', 'TRANSEQ', + 'MACHINE','MINING', 'FOODPRO', 'PAPERPRO', 'WOODPRO', + 'CONSTRUC', 'TEXTILES') + + # all products associated with those flows + products_to_fix <- ieamatch %>% + filter(.data$iea_flows %in% flows_to_fix) %>% + getElement('iea_product') %>% + unique() + + region_mapping <- toolGetMapping(name = 'regionmapping_21_EU11.csv', + type = 'regional', + where = 'mappingfolder') %>% + as_tibble() %>% + select('iso3c' = .data$CountryCode, 'region' = .data$RegionCode) + + # extend industry subsector time series ---- + # subset of data containing industry subsector products and flows + data_industry <- data %>% + `[`(,,intersect(getNames(data), + cartesian(products_to_fix, + c(flows_to_fix, 'TOTIND', 'INONSPEC')))) %>% + as.data.frame() %>% + as_tibble() %>% + select(iso3c = 'Region', year = 'Year', product = 'Data1', flow = 'Data2', + value = 'Value') %>% + mutate(year = as.integer(as.character(.data$year))) %>% + filter(0 != .data$value) %>% + inner_join(region_mapping, 'iso3c') %>% + assert(not_na, .data$region) + + ## apply five-year moving average ---- + data_industry <- data_industry %>% + group_by(.data$iso3c, .data$region, .data$product, .data$flow) %>% + arrange(.data$year) %>% + mutate(value = zoo::rollapply( + # pad data with two leading and trailing NAs + data = c(NA, NA, .data$value, NA, NA), + width = 5, + # ignoring NAs in mean() stumps the mean on the edges to four/three years + FUN = function(x) { mean(x, na.rm = TRUE) })) %>% + ungroup() + + # all products that use less then 1 % of total energy outside of non-specified + # industry are 'suspicious' and will be fixed + data_to_fix <- data_industry %>% + filter(.data$flow %in% c('TOTIND', 'INONSPEC')) %>% + spread(.data$flow, .data$value) %>% + filter(1 - .data$INONSPEC / .data$TOTIND < 1e-2) %>% + select('iso3c', 'region', 'year', 'product', 'TOTIND') + + # use all non-suspicious data to calculate regional and global averages + data_for_fixing <- anti_join( + data_industry %>% + filter('TOTIND' != .data$flow), + + data_to_fix %>% + select(-.data$TOTIND), + + c('iso3c', 'region', 'year', 'product') + ) %>% + as_tibble() + + data_for_fixing <- full_join( + # compute global averages + data_for_fixing %>% + group_by(.data$year, .data$product, .data$flow) %>% + summarise(value = sum(.data$value), .groups = 'drop_last') %>% + mutate(global_share = .data$value / sum(.data$value)) %>% + ungroup() %>% + select(-.data$value) %>% + # and expand to all regions + mutate(region = NA_character_) %>% + complete(nesting(!!!syms(c('year', 'product', 'flow', 'global_share'))), + region = unique(region_mapping$region)), + + # compute regional averages + data_for_fixing %>% + group_by(.data$year, .data$region, .data$product, .data$flow) %>% + summarise(value = sum(.data$value), .groups = 'drop_last') %>% + mutate(regional_share = .data$value / sum(.data$value)) %>% + ungroup() %>% + select(-.data$value), + + c('year', 'region', 'product', 'flow') + ) %>% + # use regional averages if available, global averages otherwise + mutate(value = ifelse(!is.na(.data$regional_share), .data$regional_share, + .data$global_share)) %>% + select(-.data$regional_share, -.data$global_share) %>% + interpolate_missing_periods_( + periods = list(year = sub('^y([0-9]{4})$', '\\1', getYears(data)) %>% + as.integer() %>% + sort()), + expand.values = TRUE, method = 'linear') + + # calculated fixed data + data_industry_fixed <- left_join( + data_to_fix, + data_for_fixing, + c('region', 'year', 'product') + ) %>% + # replace "suspicious" data with averages + mutate(value = .data$TOTIND * .data$value) %>% + select(.data$iso3c, .data$region, .data$year, .data$product, .data$flow, + .data$value) %>% + assert(not_na, .data$value) %>% + overwrite(data_industry) + + # redistribute at least of each product into each subsector ---- + data_industry_fixed <- data_industry_fixed %>% + complete(nesting(!!!syms(c('iso3c', 'region', 'year', 'product'))), + flow = c(flows_to_fix, 'INONSPEC'), + fill = list(value = 0)) %>% + group_by(.data$iso3c, .data$year, .data$product) %>% + # which flow belongs to which subsector? + right_join( + tribble( + ~subsector, ~flow, + 'cement', 'NONMET', + 'chemicals', 'CHEMICAL', + 'steel', 'IRONSTL') %>% + complete(flow = c(flows_to_fix, 'INONSPEC'), + fill = list(subsector = 'otherInd')), + + 'flow' + ) %>% + # compute subsector totals + group_by(.data$subsector, .add = TRUE) %>% + mutate(subsector.total = sum(.data$value), + subsector.count = n()) %>% + ungroup(.data$subsector) %>% + mutate( + # each subsector consumes at least of the total consumption of + # each product (with the exception of heat, which is only consumed in the + # otherInd subsector) + subsector.min = ifelse('HEAT' == .data$product, 0, + threshold * sum(.data$value)), + # if total subsector consumption is below the minimum, consumption must be + # added + subsector.add = pmax(0, .data$subsector.min - .data$subsector.total), + # each flow gets consumption added according to its share in total + # subsector consumption + flow.add = ifelse(0 != .data$subsector.total, + ( .data$subsector.add + / .data$subsector.total + * .data$value + ), + .data$subsector.add / .data$subsector.count), + # if the additional flow is zero, consumption has to be subtracted from\ + # this flow, in relation to its share of all flows with + # more-than-threshold consumption + flow.add = ifelse(0 != .data$flow.add, .data$flow.add, + ( -sum(.data$flow.add) + * .data$value + / sum(.data$value[0 == .data$flow.add]) + )), + value = .data$value + .data$flow.add) %>% + ungroup() %>% + select('iso3c', 'region', 'year', 'product', 'flow', 'value') + + # replace and append fixed data ---- + data_industry_fixed_overwrite <- data_industry_fixed %>% + semi_join( + data_industry, + + c('iso3c', 'region', 'year', 'product', 'flow') + ) %>% + select('iso3c', 'year', 'product', 'flow', 'value') %>% + as.magpie(spatial = 1, temporal = 2, data = 5) + + data_industry_fixed_append <- data_industry_fixed %>% + anti_join( + data_industry, + + c('iso3c', 'region', 'year', 'product', 'flow') + ) %>% + select('iso3c', 'year', 'product', 'flow', 'value') %>% + complete(nesting(!!!syms(c('product', 'flow'))), + iso3c = getRegions(data), + year = getYears(data, as.integer = TRUE), + fill = list(value = 0)) %>% + select('iso3c', 'year', 'product', 'flow', 'value') %>% + as.magpie(spatial = 1, temporal = 2, data = 5) + + data[getRegions(data_industry_fixed_overwrite), + getYears(data_industry_fixed_overwrite), + getNames(data_industry_fixed_overwrite)] <- data_industry_fixed_overwrite + + data <- mbind(data, data_industry_fixed_append) + + return(data) +} diff --git a/README.md b/README.md index b2249185..a21d93d1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # input data generation for the REMIND industry module -R package **mrindustry**, version **0.3.0** +R package **mrindustry**, version **0.4.0** [![CRAN status](https://www.r-pkg.org/badges/version/mrindustry)](https://cran.r-project.org/package=mrindustry) [![R build status](https://github.com/pik-piam/mrindustry/workflows/check/badge.svg)](https://github.com/pik-piam/mrindustry/actions) [![codecov](https://codecov.io/gh/pik-piam/mrindustry/branch/master/graph/badge.svg)](https://app.codecov.io/gh/pik-piam/mrindustry) [![r-universe](https://pik-piam.r-universe.dev/badges/mrindustry)](https://pik-piam.r-universe.dev/builds) @@ -39,7 +39,7 @@ In case of questions / problems please contact Falk Benke To cite package **mrindustry** in publications use: -Benke F, Dürrwächter J, Rodrigues R, Moreno-Leiva S, Baumstark L, Pehl M (2024). _mrindustry: input data generation for the REMIND industry module_. R package version 0.3.0, . +Benke F, Dürrwächter J, Rodrigues R, Moreno-Leiva S, Baumstark L, Pehl M (2024). _mrindustry: input data generation for the REMIND industry module_. R package version 0.4.0, . A BibTeX entry for LaTeX users is @@ -48,7 +48,7 @@ A BibTeX entry for LaTeX users is title = {mrindustry: input data generation for the REMIND industry module}, author = {Falk Benke and Jakob Dürrwächter and Renato Rodrigues and Simón Moreno-Leiva and Lavinia Baumstark and Michaja Pehl}, year = {2024}, - note = {R package version 0.3.0}, + note = {R package version 0.4.0}, url = {https://github.com/pik-piam/mrindustry}, } ``` diff --git a/man/tool_fix_IEA_data_for_Industry_subsectors.Rd b/man/tool_fix_IEA_data_for_Industry_subsectors.Rd new file mode 100644 index 00000000..17fdac0f --- /dev/null +++ b/man/tool_fix_IEA_data_for_Industry_subsectors.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tool_fix_IEA_data_for_Industry_subsectors.R +\name{tool_fix_IEA_data_for_Industry_subsectors} +\alias{tool_fix_IEA_data_for_Industry_subsectors} +\title{Apply corrections to IEA data needed for Industry subsectors} +\usage{ +tool_fix_IEA_data_for_Industry_subsectors(data, ieamatch, threshold = 0.01) +} +\arguments{ +\item{data}{MAgPIE object containing the IEA Energy Balances data} + +\item{ieamatch}{mapping of IEA product/flow combinations to REMIND +\code{sety}/\code{fety}/\code{te} combinations as used in \code{\link[=calcIO]{calcIO()}}} + +\item{threshold}{minimum share each industry subsector uses of each product. +Defaults to 1 \%.} +} +\value{ +a MAgPIE object +} +\description{ +Apply corrections to IEA data to cope with fragmentary time series and +replace outputs from blast furnaces and coke ovens, that are inputs into +industry subsectors, by their respective inputs. +The corrections done by this function are rather rudimentary and crude. This +gets smoothed away in regional aggregation. But do not use the resulting +country-level data without additional scrutiny. +} +\details{ +Use regional or global averages if IEA industry data lists energy use only as +"non-specified". +Outputs from blast furnaces (\code{BLFURGS}, \code{OGASES}) and coke ovens (\code{OVENCOKE}, +\code{COKEOVGS}, \code{COALTAR}, \code{NONCRUDE}), that are inputs into industry subsectors. +Used internally in \code{\link[=calcIO]{calcIO()}} for subtype \code{output_Industry_subsectors}. +} +\author{ +Michaja Pehl +}