From e1aed1d25f1eb31c9ed4ec2ea56cdc7988c45174 Mon Sep 17 00:00:00 2001 From: addyGarg <76091894+addyGarg@users.noreply.github.com> Date: Thu, 30 Sep 2021 11:57:35 +0530 Subject: [PATCH 1/2] Add get-credentials and import script for sf --- .../ceph/get-credentials-sf.sh | 35 ++++ .../backup_and_restore/ceph/import-sf.sh | 164 ++++++++++++++++++ 2 files changed, 199 insertions(+) create mode 100644 platform/onebox/backup_and_restore/ceph/get-credentials-sf.sh create mode 100644 platform/onebox/backup_and_restore/ceph/import-sf.sh diff --git a/platform/onebox/backup_and_restore/ceph/get-credentials-sf.sh b/platform/onebox/backup_and_restore/ceph/get-credentials-sf.sh new file mode 100644 index 00000000..76c8b707 --- /dev/null +++ b/platform/onebox/backup_and_restore/ceph/get-credentials-sf.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +: ' +This scipt will generate json file for creds to be used with import export +Script will generate file storage-creds.json +Run it from the VM running aifabric. +Use it as it is [insecure] or transfer to some credsManager and then change backup/restore scripts to fetch from credsmanager instead of json file# $1 - [Optional but recommended] pass private ip of the aif machine on which it is accesible from other vms in the same network +[Script Version -> 21.4] +' + +readonly PRIVATE_IP=$1 + +function initialize_variables() { + if [ -z "$PRIVATE_IP" ]; then + OBJECT_GATEWAY_EXTERNAL_HOST=$(kubectl -n istio-system get vs cephobjectstore-vs -o json | jq '.spec.hosts[0]' | tr -d '"') + else + OBJECT_GATEWAY_EXTERNAL_HOST=$PRIVATE_IP + fi + echo "$green $(date) Private IP was $PRIVATE_IP and OBJECT_GATEWAY_EXTERNAL_HOST is $OBJECT_GATEWAY_EXTERNAL_HOST" + + STORAGE_ACCESS_KEY=$(kubectl -n uipath get secret deployment-storage-credentials -o json | jq '.data.".dockerconfigjson"' | sed -e 's/^"//' -e 's/"$//' | base64 -d | jq '.access_key' | sed -e 's/^"//' -e 's/"$//') + STORAGE_SECRET_KEY=$(kubectl -n uipath get secret deployment-storage-credentials -o json | jq '.data.".dockerconfigjson"' | sed -e 's/^"//' -e 's/"$//' | base64 -d | jq '.secret_key' | sed -e 's/^"//' -e 's/"$//') + + readonly AWS_HOST=$OBJECT_GATEWAY_EXTERNAL_HOST + readonly AWS_ENDPOINT="https://${OBJECT_GATEWAY_EXTERNAL_HOST}" + readonly AWS_ACCESS_KEY_ID=$STORAGE_ACCESS_KEY + readonly AWS_SECRET_ACCESS_KEY=$STORAGE_SECRET_KEY +} + +function generate_json() { + echo '{"AWS_HOST": "'$AWS_HOST'", "AWS_ENDPOINT": "'$AWS_ENDPOINT'", "AWS_ACCESS_KEY_ID": "'$AWS_ACCESS_KEY_ID'", "AWS_SECRET_ACCESS_KEY": "'$AWS_SECRET_ACCESS_KEY'"}' > storage-creds.json +} + +initialize_variables +generate_json \ No newline at end of file diff --git a/platform/onebox/backup_and_restore/ceph/import-sf.sh b/platform/onebox/backup_and_restore/ceph/import-sf.sh new file mode 100644 index 00000000..1839750a --- /dev/null +++ b/platform/onebox/backup_and_restore/ceph/import-sf.sh @@ -0,0 +1,164 @@ +#!/bin/bash + +: ' +This scipt will import all data stored at a path to blob storage in target environments. +# $1 - json file with credentials, change the script to work with own credential manager +# $2 - path to import from +Script will look for folders like path/ceph/bucket1 path/ceph/bucket2 each containing data from 1 bucket and create bucket and upload +[Script Version -> 21.4] +' + +red=$(tput setaf 1) +green=$(tput setaf 2) +yellow=$(tput setaf 3) +default=$(tput sgr0) + +echo "$green $(date) Starting sync of object storage to local disk $default" + +readonly CREDENTIALS_FILE=$1 +readonly BASE_PATH=$2 +readonly SOURCE_TENANT_ID=$3 +readonly TARGET_TENANT_ID=$4 +readonly BUCKET_NAME_INPUT=$5 + +# Validate file provided by user exists or not, It may be relative path or absolute path +# $1 - File path +function validate_file_path() { + if [ ! -f "$1" ]; then + echo "$red $(date) $1 file does not exist, Please check ... Exiting $default" + exit 1 + fi +} + +function initialize_variables() { + # Validate file path + validate_file_path $CREDENTIALS_FILE + + export AWS_HOST=$(cat $CREDENTIALS_FILE | jq -r 'select(.AWS_HOST != null) | .AWS_HOST') + export AWS_ENDPOINT=$(cat $CREDENTIALS_FILE | jq -r 'select(.AWS_ENDPOINT != null) | .AWS_ENDPOINT') + export AWS_ACCESS_KEY_ID=$(cat $CREDENTIALS_FILE | jq -r 'select(.AWS_ACCESS_KEY_ID != null) | .AWS_ACCESS_KEY_ID') + export AWS_SECRET_ACCESS_KEY=$(cat $CREDENTIALS_FILE | jq -r 'select(.AWS_SECRET_ACCESS_KEY != null) | .AWS_SECRET_ACCESS_KEY') + readonly DATA_FOLDER_NAME="ceph" + readonly DATA_FOLDER_PATH=${BASE_PATH}/${DATA_FOLDER_NAME}/ + +} + +function upload_blob() { + BUCKET_NAME=${1} + DIR_NAME=${2} + # create bucket if not exists + echo "Inside upload_blob" + local check_bucket=$(s3cmd -v info --host=${AWS_ENDPOINT} --host-bucket= s3://${BUCKET_NAME} --no-check-certificate -q) + echo "Inside upload_blob 2" + if [ -z "$check_bucket" ]; then + echo "$green $(date) Creating bucket ${BUCKET_NAME} $default" + s3cmd mb --host=${AWS_ENDPOINT} --host-bucket= s3://${BUCKET_NAME} --no-check-certificate + else + echo "$yellow $(date) Bucket exists: ${BUCKET_NAME}, skipping $default" + fi + + # create folder if not exists + + # sync folder to bucket + echo "$green $(date) Starting sync of object storage to local disk for bucket ${BUCKET_NAME} $default" + + ## Show failure, if training-tenantId bucket is not created already on S3. + + aws s3 --endpoint-url ${AWS_ENDPOINT} --no-verify-ssl --only-show-errors sync ${DATA_FOLDER_PATH}${DIR_NAME} s3://${BUCKET_NAME}/${DIR_NAME} + echo "$green $(date) Finsihed sync of object storage to local disk for bucket ${BUCKET_NAME} $default" +} + +function update_cors_policy() { + BUCKET_NAME=${1} + DIR_NAME=${2} + if [ ! -f "${DATA_FOLDER_PATH}${DIR_NAME}-cors.json" ]; then + echo "$red $(date) ${DATA_FOLDER_PATH}${DIR_NAME}-cors.json file does not exist, Please check ... Skipping cors creation $default" + return + fi + aws --endpoint-url $AWS_ENDPOINT --no-verify-ssl s3api put-bucket-cors --bucket ${BUCKET_NAME} --cors-configuration file://${DATA_FOLDER_PATH}${DIR_NAME}-cors.json +} + +function _contains () { # Check if space-separated list $1 contains line $2 + echo "$1" | tr ' ' '\n' | grep -F -x -q "$2" +} + +function remove_unwanted_data_from_source_directory() { + + SOURCE_DIRECTORY=$1 + cd $DATA_FOLDER_PATH + + DIRS=$(find . -maxdepth 1 -mindepth 1 -type d -printf '%f\n') + echo "DIRS is $DIRS" + echo "SOURCE_DIRECTORY is $SOURCE_DIRECTORY" + + ## Check if source tenant directory is present in the storage or not. + if _contains "${DIRS}" "${SOURCE_DIRECTORY}"; then + echo "in list" + else + echo "$SOURCE_DIRECTORY not present in the storage." + exit 1 + fi + + cd $DATA_FOLDER_PATH/$SOURCE_DIRECTORY + + ## Remove all unwanted directies from source folder. + data=$(find . -maxdepth 1 -mindepth 1 -printf '%f\n') + while read folder; do + if [[ $folder =~ ^\{?[A-F0-9a-f]{8}-[A-F0-9a-f]{4}-[A-F0-9a-f]{4}-[A-F0-9a-f]{4}-[A-F0-9a-f]{12}\}?$ ]]; then + echo "Pass $folder" + continue; + else + echo "Deleting $folder" + sudo rm -rf $folder + fi + done <<<"$data" + cd $DATA_FOLDER_PATH +} + +function change_source_to_target_tenant_id() { + SOURCE_DIRECTORY=$1 + TARGET_DIRECTORY=$2 + cd $DATA_FOLDER_PATH + sudo mv $SOURCE_DIRECTORY $TARGET_DIRECTORY +} + +function process_buckets() { + + cd $BASE_PATH + SOURCE_DIRECTORY="training-"$SOURCE_TENANT_ID + TARGET_DIRECTORY="training-"$TARGET_TENANT_ID + remove_unwanted_data_from_source_directory $SOURCE_DIRECTORY + change_source_to_target_tenant_id $SOURCE_DIRECTORY $TARGET_DIRECTORY + upload_blob ${BUCKET_NAME_INPUT} ${TARGET_DIRECTORY} +# update_cors_policy ${BUCKET_NAME_INPUT} ${TARGET_DIRECTORY} +} + +# Validate dependency module +# $1 - Name of the dependency module +# $2 - Command to validate module +function validate_dependency() { + eval $2 + # Next statement is checking last command success aws --version has some issue + if [ $? -ne 0 ]; then + echo "$red $(date) Please install ******** $1 *********** ... Exiting $default" + exit 1 + fi +} + +# Validate required modules exits in target setup +function validate_setup() { + validate_dependency "aws s3" "aws --version" + validate_dependency s3cmd "s3cmd --version" + echo "$(date) Successfully validated required dependencies" +} + +# Validate Setup +validate_setup + +# Update ENV Variables +initialize_variables + +# Process data inside buckets + +## Take a map input containing source and target tenant Id's. +process_buckets \ No newline at end of file From 59027f63da7d8e4d4d70d7c85197acce5bb22342 Mon Sep 17 00:00:00 2001 From: addyGarg <76091894+addyGarg@users.noreply.github.com> Date: Tue, 5 Oct 2021 08:45:15 +0530 Subject: [PATCH 2/2] chng --- .../ceph/get-credentials-sf.sh | 13 +++--- .../backup_and_restore/ceph/import-sf.sh | 45 ++++++++++++------- .../onebox/backup_and_restore/ceph/import.sh | 4 ++ 3 files changed, 40 insertions(+), 22 deletions(-) diff --git a/platform/onebox/backup_and_restore/ceph/get-credentials-sf.sh b/platform/onebox/backup_and_restore/ceph/get-credentials-sf.sh index 76c8b707..10d5cde5 100644 --- a/platform/onebox/backup_and_restore/ceph/get-credentials-sf.sh +++ b/platform/onebox/backup_and_restore/ceph/get-credentials-sf.sh @@ -2,21 +2,22 @@ : ' This scipt will generate json file for creds to be used with import export -Script will generate file storage-creds.json +Script will generate file storage-creds-sf.json Run it from the VM running aifabric. Use it as it is [insecure] or transfer to some credsManager and then change backup/restore scripts to fetch from credsmanager instead of json file# $1 - [Optional but recommended] pass private ip of the aif machine on which it is accesible from other vms in the same network [Script Version -> 21.4] ' -readonly PRIVATE_IP=$1 +readonly PUBLIC_IP=$1 function initialize_variables() { - if [ -z "$PRIVATE_IP" ]; then + if [ -z "$PUBLIC_IP" ]; then + ## If public ip not given as an argument. OBJECT_GATEWAY_EXTERNAL_HOST=$(kubectl -n istio-system get vs cephobjectstore-vs -o json | jq '.spec.hosts[0]' | tr -d '"') else - OBJECT_GATEWAY_EXTERNAL_HOST=$PRIVATE_IP + OBJECT_GATEWAY_EXTERNAL_HOST=$PUBLIC_IP fi - echo "$green $(date) Private IP was $PRIVATE_IP and OBJECT_GATEWAY_EXTERNAL_HOST is $OBJECT_GATEWAY_EXTERNAL_HOST" + echo "$green $(date) Public IP was $PUBLIC_IP and OBJECT_GATEWAY_EXTERNAL_HOST is $OBJECT_GATEWAY_EXTERNAL_HOST" STORAGE_ACCESS_KEY=$(kubectl -n uipath get secret deployment-storage-credentials -o json | jq '.data.".dockerconfigjson"' | sed -e 's/^"//' -e 's/"$//' | base64 -d | jq '.access_key' | sed -e 's/^"//' -e 's/"$//') STORAGE_SECRET_KEY=$(kubectl -n uipath get secret deployment-storage-credentials -o json | jq '.data.".dockerconfigjson"' | sed -e 's/^"//' -e 's/"$//' | base64 -d | jq '.secret_key' | sed -e 's/^"//' -e 's/"$//') @@ -28,7 +29,7 @@ function initialize_variables() { } function generate_json() { - echo '{"AWS_HOST": "'$AWS_HOST'", "AWS_ENDPOINT": "'$AWS_ENDPOINT'", "AWS_ACCESS_KEY_ID": "'$AWS_ACCESS_KEY_ID'", "AWS_SECRET_ACCESS_KEY": "'$AWS_SECRET_ACCESS_KEY'"}' > storage-creds.json + echo '{"AWS_HOST": "'$AWS_HOST'", "AWS_ENDPOINT": "'$AWS_ENDPOINT'", "AWS_ACCESS_KEY_ID": "'$AWS_ACCESS_KEY_ID'", "AWS_SECRET_ACCESS_KEY": "'$AWS_SECRET_ACCESS_KEY'"}' > storage-creds-sf.json } initialize_variables diff --git a/platform/onebox/backup_and_restore/ceph/import-sf.sh b/platform/onebox/backup_and_restore/ceph/import-sf.sh index 1839750a..8247a3e1 100644 --- a/platform/onebox/backup_and_restore/ceph/import-sf.sh +++ b/platform/onebox/backup_and_restore/ceph/import-sf.sh @@ -3,7 +3,10 @@ : ' This scipt will import all data stored at a path to blob storage in target environments. # $1 - json file with credentials, change the script to work with own credential manager -# $2 - path to import from +# $2 - Absolute path to import from +# $3 - Source tenant id. +# $4 - Target tenant id. +# $5 - Bucket name. Script will look for folders like path/ceph/bucket1 path/ceph/bucket2 each containing data from 1 bucket and create bucket and upload [Script Version -> 21.4] ' @@ -40,16 +43,14 @@ function initialize_variables() { export AWS_SECRET_ACCESS_KEY=$(cat $CREDENTIALS_FILE | jq -r 'select(.AWS_SECRET_ACCESS_KEY != null) | .AWS_SECRET_ACCESS_KEY') readonly DATA_FOLDER_NAME="ceph" readonly DATA_FOLDER_PATH=${BASE_PATH}/${DATA_FOLDER_NAME}/ - } function upload_blob() { BUCKET_NAME=${1} DIR_NAME=${2} + TARGET_DIR_NAME=${3} # create bucket if not exists - echo "Inside upload_blob" - local check_bucket=$(s3cmd -v info --host=${AWS_ENDPOINT} --host-bucket= s3://${BUCKET_NAME} --no-check-certificate -q) - echo "Inside upload_blob 2" + local check_bucket=$(s3cmd info --host=${AWS_ENDPOINT} --host-bucket= s3://${BUCKET_NAME} --no-check-certificate -q) if [ -z "$check_bucket" ]; then echo "$green $(date) Creating bucket ${BUCKET_NAME} $default" s3cmd mb --host=${AWS_ENDPOINT} --host-bucket= s3://${BUCKET_NAME} --no-check-certificate @@ -64,7 +65,7 @@ function upload_blob() { ## Show failure, if training-tenantId bucket is not created already on S3. - aws s3 --endpoint-url ${AWS_ENDPOINT} --no-verify-ssl --only-show-errors sync ${DATA_FOLDER_PATH}${DIR_NAME} s3://${BUCKET_NAME}/${DIR_NAME} + aws s3 --endpoint-url ${AWS_ENDPOINT} --no-verify-ssl --only-show-errors sync ${DATA_FOLDER_PATH}${DIR_NAME} s3://${BUCKET_NAME}/${TARGET_DIR_NAME} echo "$green $(date) Finsihed sync of object storage to local disk for bucket ${BUCKET_NAME} $default" } @@ -78,33 +79,34 @@ function update_cors_policy() { aws --endpoint-url $AWS_ENDPOINT --no-verify-ssl s3api put-bucket-cors --bucket ${BUCKET_NAME} --cors-configuration file://${DATA_FOLDER_PATH}${DIR_NAME}-cors.json } -function _contains () { # Check if space-separated list $1 contains line $2 +function _contains() { # Check if space-separated list $1 contains item $2 echo "$1" | tr ' ' '\n' | grep -F -x -q "$2" } function remove_unwanted_data_from_source_directory() { SOURCE_DIRECTORY=$1 + echo "$green $(date) Removing unwanted data from source directory $SOURCE_DIRECTORY $default" cd $DATA_FOLDER_PATH + ## Fetch list of directories available in the ceph folder. DIRS=$(find . -maxdepth 1 -mindepth 1 -type d -printf '%f\n') - echo "DIRS is $DIRS" - echo "SOURCE_DIRECTORY is $SOURCE_DIRECTORY" ## Check if source tenant directory is present in the storage or not. if _contains "${DIRS}" "${SOURCE_DIRECTORY}"; then - echo "in list" + echo "$green $SOURCE_DIRECTORY present in the storage $default." else - echo "$SOURCE_DIRECTORY not present in the storage." + echo "$red $SOURCE_DIRECTORY not present in the storage $default." exit 1 fi cd $DATA_FOLDER_PATH/$SOURCE_DIRECTORY - ## Remove all unwanted directies from source folder. + ## Remove all unwanted directories from source folder. data=$(find . -maxdepth 1 -mindepth 1 -printf '%f\n') while read folder; do if [[ $folder =~ ^\{?[A-F0-9a-f]{8}-[A-F0-9a-f]{4}-[A-F0-9a-f]{4}-[A-F0-9a-f]{4}-[A-F0-9a-f]{12}\}?$ ]]; then + ## Only folder names which are in the form of guid are allowed, as they represent projectIds. echo "Pass $folder" continue; else @@ -112,12 +114,15 @@ function remove_unwanted_data_from_source_directory() { sudo rm -rf $folder fi done <<<"$data" + cd $DATA_FOLDER_PATH } function change_source_to_target_tenant_id() { SOURCE_DIRECTORY=$1 TARGET_DIRECTORY=$2 + + echo "$green $(date) Changing source tenantId $SOURCE_DIRECTORY to target tenantId $TARGET_DIRECTORY $default" cd $DATA_FOLDER_PATH sudo mv $SOURCE_DIRECTORY $TARGET_DIRECTORY } @@ -128,11 +133,19 @@ function process_buckets() { SOURCE_DIRECTORY="training-"$SOURCE_TENANT_ID TARGET_DIRECTORY="training-"$TARGET_TENANT_ID remove_unwanted_data_from_source_directory $SOURCE_DIRECTORY - change_source_to_target_tenant_id $SOURCE_DIRECTORY $TARGET_DIRECTORY - upload_blob ${BUCKET_NAME_INPUT} ${TARGET_DIRECTORY} +# change_source_to_target_tenant_id $SOURCE_DIRECTORY $TARGET_DIRECTORY + upload_blob ${BUCKET_NAME_INPUT} ${SOURCE_DIRECTORY} ${TARGET_DIRECTORY} # update_cors_policy ${BUCKET_NAME_INPUT} ${TARGET_DIRECTORY} } +function process_ml_model_files() { + + echo "Inside process_ml_model_files $DATA_FOLDER_PATH, $" + cd $BASE_PATH + aws s3 --endpoint-url ${AWS_ENDPOINT} --no-verify-ssl --only-show-errors sync ${DATA_FOLDER_PATH}/ml-model-files/ s3://ml-model-files/ + cd - +} + # Validate dependency module # $1 - Name of the dependency module # $2 - Command to validate module @@ -159,6 +172,6 @@ validate_setup initialize_variables # Process data inside buckets +process_buckets -## Take a map input containing source and target tenant Id's. -process_buckets \ No newline at end of file +process_ml_model_files \ No newline at end of file diff --git a/platform/onebox/backup_and_restore/ceph/import.sh b/platform/onebox/backup_and_restore/ceph/import.sh index 8d547aa6..8e1d0183 100644 --- a/platform/onebox/backup_and_restore/ceph/import.sh +++ b/platform/onebox/backup_and_restore/ceph/import.sh @@ -35,6 +35,10 @@ function initialize_variables() { export AWS_ENDPOINT=$(cat $CREDENTIALS_FILE | jq -r 'select(.AWS_ENDPOINT != null) | .AWS_ENDPOINT') export AWS_ACCESS_KEY_ID=$(cat $CREDENTIALS_FILE | jq -r 'select(.AWS_ACCESS_KEY_ID != null) | .AWS_ACCESS_KEY_ID') export AWS_SECRET_ACCESS_KEY=$(cat $CREDENTIALS_FILE | jq -r 'select(.AWS_SECRET_ACCESS_KEY != null) | .AWS_SECRET_ACCESS_KEY') + echo "AWS_HOST is $AWS_HOST" + echo "AWS_ENDPOINT is $AWS_ENDPOINT" + echo "AWS_ACCESS_KEY_ID is $AWS_ACCESS_KEY_ID" + echo "AWS_SECRET_ACCESS_KEY is $AWS_SECRET_ACCESS_KEY" readonly FOLDER=${BASE_PATH}/ceph/ }