Skip to content

Commit

Permalink
Merge pull request #358 from ONSdigital/RDRP-787_test_export
Browse files Browse the repository at this point in the history
Rdrp 787 test export
  • Loading branch information
JenCheshire authored Oct 22, 2024
2 parents 23098b9 + 545784c commit 79fab5a
Show file tree
Hide file tree
Showing 8 changed files with 442 additions and 16 deletions.
7 changes: 7 additions & 0 deletions export_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
import os

from importlib import reload

# Change to the project repository location
my_wd = os.getcwd()
my_repo = "research-and-development"
if not my_wd.endswith(my_repo):
os.chdir(my_repo)

from src.outputs import export_files

reload(export_files)
Expand Down
103 changes: 103 additions & 0 deletions export_mods_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""Script that creates all directories"""
import os

# Change to the project repository location
my_wd = os.getcwd()
my_repo = "research-and-development"
if not my_wd.endswith(my_repo):
os.chdir(my_repo)

from src.utils.singleton_boto import SingletonBoto

config = {
"s3": {
"ssl_file": "/etc/pki/tls/certs/ca-bundle.crt",
"s3_bucket": "onscdp-dev-data01-5320d6ca"
}
}

boto3_client = SingletonBoto.get_client(config)
import src.utils.s3_mods as mods


if __name__ == "__main__":

my_path = "/bat/res_dev/project_data/2023_surveys/BERD/01_staging/staging_qa/full_responses_qa/2023_staged_BERD_full_responses_24-10-02_v20.csv"
# to_delete_path = "/bat/res_dev/project_data/2023_surveys/BERD/01_staging/staging_qa/full_responses_qa/2023_staged_BERD_full_responses_test_to_delete.csv"
my_dir = "/bat/res_dev/project_data/2023_surveys/BERD/01_staging/staging_qa/full_responses_qa/"
# # Checking that file exists
my_size = mods.rd_file_size(my_path)
print(f"File size is {my_size}")

# Calculating md5sum
my_sum = mods.rd_md5sum(my_path)
expected_output = "ea94424aceecf11c8a70d289e51c34ea"
print(type(my_sum))
if expected_output == my_sum:
print("Same md5sum")

# Calculating rd_isdir
mydir = "/bat"
response = mods.rd_isdir(mydir)

print("Got response")
print(response)

# Checking rd_isfile
response = mods.rd_isfile(my_path)
print(response)

# Checking that rd_stat_size works for files and directories
file_size = mods.rd_stat_size(my_path)
print(f"File {my_path} size is {file_size} bytes.")

dir_size = mods.rd_stat_size(my_dir)
print(f"Directory {my_dir} size is {dir_size} bytes.")

# Testing rd_read_header 
response = mods.rd_read_header(my_path)
print(response)

# Testing rd_write_string_to_file
out_path = "/bat/res_dev/project_data/new_write_string_test_2.txt"
content = "New content"
mods.rd_write_string_to_file(content.encode(encoding="utf-8"), out_path)
print("all done")

# Testing rd_copy_file
src_path = "/bat/res_dev/project_data/new_write_string_test_2.txt"
dst_path = "/bat/res_dev/"
success = mods.rd_copy_file(src_path, dst_path)
if success:
print("File copied successfully")
else:
print("File not copied successfully")


# Testing rd_move_file
src_path = "/bat/res_dev/new_write_string_test_2.txt"
dst_path = "/bat/res_dev/project_data/"
success = mods.rd_move_file(src_path, dst_path)
if success:
print("File moved successfully")
else:
print("File not moved successfully")


# Testing rd_search_file
dir_path = "bat/res_dev/project_data/2023_surveys/BERD/01_staging/staging_qa/full_responses_qa/"
ending = "24-10-02_v20.csv"

found_file = mods.rd_search_file(dir_path, ending)
print(f"Found file: {found_file}")

# Deleting a file
# status = mods.rd_delete_file(my_path)
# if status:
# print(f"File {to_delete_path} successfully deleted")

# Testing read_excel
# my_path = "bat/res_dev/project_data/test_excel_gz.xlsx"
# df = mods.read_excel(my_path)
# print(df.head())

2 changes: 1 addition & 1 deletion src/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.1.2"
__version__ = "2.2.0"
2 changes: 1 addition & 1 deletion src/dev_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ global:
table_config: "SingleLine"
# Environment settings
dev_test : False
platform: network #whether to load from hdfs, network (Windows) or s3 (CDP)
platform: network # network #whether to load from hdfs, network (Windows) or s3 (CDP)
load_from_feather: False
runlog_writer:
write_csv: True # Write the runlog to a CSV file
Expand Down
10 changes: 6 additions & 4 deletions src/outputs/export_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def check_files_exist(file_list: List, config: dict, isfile: callable):
for file in file_list:
file_path = Path(file) # Changes to path if str
OutgoingLogger.debug(f"Using {platform} isfile function")
if not isfile(file_path):
if not isfile(str(file_path)):
OutgoingLogger.error(
f"File {file} does not exist. Check existence and spelling"
)
Expand All @@ -152,7 +152,7 @@ def transfer_files(source, destination, method, logger, copy_files, move_files):
"""
transfer_func = {"copy": copy_files, "move": move_files}[method]
past_tense = {"copy": "copied", "move": "moved"}[method]
transfer_func(source, destination)
transfer_func(str(source), destination)

logger.info(f"Files {source} successfully {past_tense} to {destination}.")

Expand Down Expand Up @@ -227,10 +227,12 @@ def run_export(user_config_path: str, dev_config_path: str):
platform = config["global"]["platform"]

if platform == "s3":
# create singletion boto3 client object & pass in bucket string
from src.utils.singleton_boto import SingletonBoto

boto3_client = SingletonBoto.get_client(config) # noqa
from src.utils import s3_mods as mods

# Creating boto3 client and adding it to the config dict
config["client"] = mods.create_client(config)
elif platform == "network":
# If the platform is "network" or "hdfs", there is no need for a client.
# Adding a client = None for consistency.
Expand Down
2 changes: 0 additions & 2 deletions src/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,6 @@ def run_pipeline(user_config_path, dev_config_path):
boto3_client = SingletonBoto.get_client(config) # noqa
from src.utils import s3_mods as mods

# Creating boto3 client and adding it to the config dict
# config["client"] = boto3_client
elif platform == "network":
# If the platform is "network" or "hdfs", there is no need for a client.
# Adding a client = None for consistency.
Expand Down
4 changes: 2 additions & 2 deletions src/user_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@ export_choices:
export_fte_total_qa: None
export_status_filtered: None
export_frozen_group: None
export_staged_BERD_full_responses: None
export_staged_BERD_full_responses: "2023_staged_BERD_full_responses_24-10-14_v33.csv"
export_staged_NI_full_responses: None
export_full_responses_imputed: None
export_full_estimation_qa: None # "2022_full_estimation_qa_24-07-15_v555.csv"
export_invalid_unrecognised_postcodes: "2022_invalid_unrecognised_postcodes_24-07-04_v503.csv"
export_invalid_unrecognised_postcodes: None # "2022_invalid_unrecognised_postcodes_24-07-04_v503.csv"
Loading

0 comments on commit 79fab5a

Please sign in to comment.