diff --git a/.github/workflows/postgresql-16-src-make.yml b/.github/workflows/postgresql-16-src-make.yml index c943a47d..bf5386ad 100644 --- a/.github/workflows/postgresql-16-src-make.yml +++ b/.github/workflows/postgresql-16-src-make.yml @@ -34,7 +34,7 @@ jobs: uses: actions/checkout@v2 with: repository: 'postgres/postgres' - ref: '93dcdfa88f5f9befb781558920e02d3ee86dc629' + ref: 'a81e5516fa4bc53e332cb35eefe231147c0e1749' path: 'src' - name: Clone postgres-tde-ext repository diff --git a/.github/workflows/postgresql-16-src-meson.yml b/.github/workflows/postgresql-16-src-meson.yml index d5f39d8e..56013586 100644 --- a/.github/workflows/postgresql-16-src-meson.yml +++ b/.github/workflows/postgresql-16-src-meson.yml @@ -34,7 +34,7 @@ jobs: uses: actions/checkout@v2 with: repository: 'postgres/postgres' - ref: '93dcdfa88f5f9befb781558920e02d3ee86dc629' + ref: 'a81e5516fa4bc53e332cb35eefe231147c0e1749' path: 'src' - name: Clone postgres-tde-ext repository diff --git a/README.md b/README.md index da679d71..950996c5 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,9 @@ This is based on the heap code as of the following commit: ``` -commit 93dcdfa88f5f9befb781558920e02d3ee86dc629 -Author: Tom Lane -Date: Mon Jul 10 12:14:34 2023 -0400 +commit a81e5516fa4bc53e332cb35eefe231147c0e1749 (HEAD -> REL_16_STABLE, origin/REL_16_STABLE) +Author: Amit Kapila +Date: Wed Sep 13 09:48:31 2023 +0530 + + Fix the ALTER SUBSCRIPTION to reflect the change in run_as_owner option. ``` diff --git a/src/access/pg_tde_io.c b/src/access/pg_tde_io.c index b62dad71..b92589dc 100644 --- a/src/access/pg_tde_io.c +++ b/src/access/pg_tde_io.c @@ -287,6 +287,24 @@ RelationAddBlocks(Relation relation, BulkInsertState bistate, */ extend_by_pages += extend_by_pages * waitcount; + /* --- + * If we previously extended using the same bistate, it's very likely + * we'll extend some more. Try to extend by as many pages as + * before. This can be important for performance for several reasons, + * including: + * + * - It prevents mdzeroextend() switching between extending the + * relation in different ways, which is inefficient for some + * filesystems. + * + * - Contention is often intermittent. Even if we currently don't see + * other waiters (see above), extending by larger amounts can + * prevent future contention. + * --- + */ + if (bistate) + extend_by_pages = Max(extend_by_pages, bistate->already_extended_by); + /* * Can't extend by more than MAX_BUFFERS_TO_EXTEND_BY, we need to pin * them all concurrently. @@ -325,7 +343,7 @@ RelationAddBlocks(Relation relation, BulkInsertState bistate, * [auto]vacuum trying to truncate later pages as REL_TRUNCATE_MINIMUM is * way larger. */ - first_block = ExtendBufferedRelBy(EB_REL(relation), MAIN_FORKNUM, + first_block = ExtendBufferedRelBy(BMR_REL(relation), MAIN_FORKNUM, bistate ? bistate->strategy : NULL, EB_LOCK_FIRST, extend_by_pages, @@ -413,6 +431,7 @@ RelationAddBlocks(Relation relation, BulkInsertState bistate, /* maintain bistate->current_buf */ IncrBufferRefCount(buffer); bistate->current_buf = buffer; + bistate->already_extended_by += extend_by_pages; } return buffer; diff --git a/src/access/pg_tde_visibilitymap.c b/src/access/pg_tde_visibilitymap.c index 390c7d60..4d25e633 100644 --- a/src/access/pg_tde_visibilitymap.c +++ b/src/access/pg_tde_visibilitymap.c @@ -631,7 +631,7 @@ vm_extend(Relation rel, BlockNumber vm_nblocks) { Buffer buf; - buf = ExtendBufferedRelTo(EB_REL(rel), VISIBILITYMAP_FORKNUM, NULL, + buf = ExtendBufferedRelTo(BMR_REL(rel), VISIBILITYMAP_FORKNUM, NULL, EB_CREATE_FORK_IF_NEEDED | EB_CLEAR_SIZE_CACHE, vm_nblocks, diff --git a/src/access/pg_tdeam.c b/src/access/pg_tdeam.c index aef070a1..537672b6 100644 --- a/src/access/pg_tdeam.c +++ b/src/access/pg_tdeam.c @@ -656,7 +656,7 @@ pg_tde_gettup_advance_block(HeapScanDesc scan, BlockNumber block, ScanDirection if (block == scan->rs_startblock) return InvalidBlockNumber; - /* check if the limit imposed by pg_tde_setscanlimits() is met */ + /* check if the limit imposed by heap_setscanlimits() is met */ if (scan->rs_numblocks != InvalidBlockNumber) { if (--scan->rs_numblocks == 0) @@ -1781,6 +1781,7 @@ GetBulkInsertState(void) bistate->current_buf = InvalidBuffer; bistate->next_free = InvalidBlockNumber; bistate->last_free = InvalidBlockNumber; + bistate->already_extended_by = 0; return bistate; } diff --git a/src/include/access/pg_tde_io.h b/src/include/access/pg_tde_io.h index 607c870f..be7aaaff 100644 --- a/src/include/access/pg_tde_io.h +++ b/src/include/access/pg_tde_io.h @@ -32,15 +32,22 @@ typedef struct BulkInsertStateData Buffer current_buf; /* current insertion target page */ /* - * State for bulk extensions. Further pages that were unused at the time - * of the extension. They might be in use by the time we use them though, - * so rechecks are needed. + * State for bulk extensions. + * + * last_free..next_free are further pages that were unused at the time of + * the last extension. They might be in use by the time we use them + * though, so rechecks are needed. * * XXX: Eventually these should probably live in RelationData instead, * alongside targetblock. + * + * already_extended_by is the number of pages that this bulk inserted + * extended by. If we already extended by a significant number of pages, + * we can be more aggressive about extending going forward. */ BlockNumber next_free; BlockNumber last_free; + uint32 already_extended_by; } BulkInsertStateData; diff --git a/src/keyring/keyring_file.c b/src/keyring/keyring_file.c index 1f489d6c..18915cff 100644 --- a/src/keyring/keyring_file.c +++ b/src/keyring/keyring_file.c @@ -50,7 +50,7 @@ int keyringFilePreloadCache(void) return 1; } -int keyringFileStoreKey(const keyInfo*) +int keyringFileStoreKey(const keyInfo* ki) { // First very basic prototype: we just dump the cache to disk FILE* f = fopen(keyringFileDataFileName, "w"); diff --git a/tools/heap_merge.sh b/tools/heap_merge.sh new file mode 100644 index 00000000..a07afaef --- /dev/null +++ b/tools/heap_merge.sh @@ -0,0 +1,409 @@ +#!/bin/bash + +# SCRIPT: patch_generator.sh +#----------------------------- +# This script generates patch between two PG commits and applies it to +# the TDE extension source. + +set -o pipefail + +## GLOBAL VARIABLES +export TDE="tde" +export SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + +export WORKING_DIR="${WORKING_DIR:-$(mktemp -d -t $TDE)}" +export TDE_DIR="${WORKING_DIR}/tde" +export USER_TDE_DIR="" +export PG_COMMIT_BASE="${PG_COMMIT_BASE}" +export PG_COMMIT_LATEST="${PG_COMMIT_BASE}" +export TDE_COMMIT="${TDE_COMMIT}" + +export FILES_BASE_DIR="pg_base" +export FILES_LATEST_DIR="pg_latest" +export FILES_PATCH_DIR="pg_patches" +export TDE_DRY_RUN="--dry-run" +export APPLY_PATCHES_FORCE=0 + +# Script variables +total_patches=0 +total_patches_failed=0 + +declare -a patch_list_unclean=() + +declare -a pg_header_file_map=("visibilitymap.h" "rewriteheap.h" "heapam_xlog.h" "hio.h" "heapam.h" "heaptoast.h") +declare -a tde_header_file_map=("pg_tde_visibilitymap.h" "pg_tde_rewrite.h" "pg_tdeam_xlog.h" "pg_tde_io.h" "pg_tdeam.h" "pg_tdetoast.h") + +declare -a pg_c_file_map=("heapam.c" "heapam_handler.c" "heapam_visibility.c" "heaptoast.c" "hio.c" "pruneheap.c" "rewriteheap.c" "vacuumlazy.c" "visibilitymap.c") +declare -a tde_c_file_map=("pg_tdeam.c" "pg_tdeam_handler.c" "pg_tdeam_visibility.c" "pg_tdetoast.c" "pg_tde_io.c" "pg_tde_prune.c" "pg_tde_rewrite.c" "pg_tde_vacuumlazy.c" "pg_tde_visibilitymap.c") + + +## USAGE +usage() +{ + errorCode=${1:-0} + + cat << EOF + +usage: $0 OPTIONS + +This script generates file-wise patches between two PG commits and applies it to +the TDE extension source. + +By default, it only performs a dry run of the patch application. See the usage +options below for applying clean patches or forcefully applying all patches. + +It clones both PG and TDE repositories in the working directory. If TDE path is +specified either with its usage option or via the environment variable, then +the script will use the given TDE source code. + +* All working folders folders created will carry "$TDE" as part of the folder name. +* This simplies the manual cleanup process. + +OPTIONS can be: + + -h Show this message + + -a The patches are not applied by default. Specify this to + apply the generated patches. Otherwise, the script will + only perform a dryrun. + + -f Force apply patches. + + -b [PG_COMMIT_BASE] PG base commit hash/branch/tag for patch [REQUIRED] + -l [PG_COMMIT_LATEST] PG lastest commit hash/branch/tag for patch [REQUIRED] + -x [TDE_COMMIT] TDE commit hash/branch/tag to apply patch on [REQUIRED] + + -t [USER_TDE_DIR] Source directory for TDE [Default: Cloned under WORKING_DIR] + -w [WORKING_DIR] Script working folder [Default: $WORKING_DIR] + * a folder where patches and relevant log + files may be created. This folder will not be removed + by the script, so better to keep it in the temp folder. + +EOF + + if [[ $errorCode -ne 0 ]]; + then + exit_script $errorCode + fi +} + +# Perform any required cleanup and exit with the given error/success code +exit_script() +{ + # Reminder of manual cleanup + if [[ -d $WORKING_DIR ]]; + then + printf "\n%20s\n" | tr " " "-" + printf "The following folder was created by the script and may require manual removal.\n" + printf "* %s\n" $WORKING_DIR + printf "%20s\n" | tr " " "-" + fi + + # Exit with a given return code or 0 if none are provided. + exit ${1:-0} +} + +# Raise the error for a failure to checkout required source +checkout_validate() +{ + commit=$1 + retval=$2 + + if [[ $rteval -ne 0 ]]; + then + printf "%s is not a valid commit hash/branch/tag.\n" $commit + exit_script $retval + fi +} + +# Vaildate arguments to ensure that we can safely run the benchmark +validate_args() +{ + local USAGE_TEXT="See usage for details." + local PATH_ERROR_TEXT="path is not a valid directory." + + if [[ ! -z "$USER_TDE_DIR" ]]; + then + if [[ ! -d "$USER_TDE_DIR" ]]; + then + printf "TDE %s %s\n" $PATH_ERROR_TEXT $USAGE_TEXT >&2 + usage 1 + fi + elif [[ -z "$TDE_COMMIT" ]]; + then + printf "TDE_COMMIT is not specified. %s\n" $USAGE_TEXT >&2 + usage 1 + fi + + + if [[ ! -d "$WORKING_DIR" ]]; + then + printf "Working folder %s %s\n" $PATH_ERROR_TEXT $USAGE_TEXT >&2 + usage 1 + fi + + if [[ -z "$PG_COMMIT_BASE" ]]; + then + printf "PG_COMMIT_BASE is not specified. %s\n" $USAGE_TEXT >&2 + usage 1 + fi + + if [[ -z "$PG_COMMIT_LATEST" ]]; + then + printf "PG_COMMIT_LATEST is not specified. %s\n" $USAGE_TEXT >&2 + usage 1 + fi +} + +# Print the file mapping between PG and TDE +print_map() +{ + printf "\n" + printf "%50s\n" | tr " " "=" + printf "%s\n" "Heap Access to TDE File Map" + printf "%50s\n\n" | tr " " "=" + + printf "%s\n" "--- Header Files ---" + for (( i=0; i < ${#pg_header_file_map[@]}; i++ )); + do + printf "* %-20s --> %s\n" ${pg_header_file_map[$i]} ${tde_header_file_map[$i]} + done + + printf "\n" + printf "%s\n" "--- C Files ---" + for (( i=0; i < ${#pg_c_file_map[@]}; i++ )); + do + printf "* %-20s --> %s\n" ${pg_c_file_map[$i]} ${tde_c_file_map[$i]} + done + + printf "\n\n" +} + +# Copy files from the PG source to the a separate folder. +# This function expects that we don't have duplicate file names. +copy_files() +{ + local dest_folder=$1 + shift + local file_list=("$@") + retval=0 + + for f in "${file_list[@]}"; + do + find * -name $f -exec cp -rpv {} $dest_folder \; + retval=$? + + if [[ $retval -ne 0 ]]; + then + exit_script $retval + fi + done +} + +# Compare two files and generate a patch +generate_file_patch() +{ + f_base=$1 + f_latest=$2 + f_patch=$3 + + diff -u $f_base $f_latest > $f_patch + + if [[ ! -s $f_patch ]]; + then + rm -fv $f_patch + else + total_patches=$(expr $total_patches + 1) + fi +} + +# Apply a given patch on a given file +apply_file_patch() +{ + local file_to_patch=$1 + local patch_file=$2 + local apply_patch=${APPLY_PATCHES_FORCE} + + echo "===> $APPLY_PATCHES_FORCE ==> $apply_patch" + + if [[ -f $patch_file ]]; + then + find * -name $file_to_patch | xargs -I{} echo "patch -p1 -t --dry-run {} $patch_file" | sh + + if [[ $? -ne 0 ]]; + then + total_patches_failed=$(expr $total_patches_failed + 1) + patch_list_unclean+=($(basename $patch_file)) + patch_list_unclean+=($(basename $file_to_patch)) + elif [[ -z "$TDE_DRY_RUN" ]]; + then + apply_patch=1 + fi + + echo "ABOUT TO APPLY PATCH" + + if [[ $apply_patch -eq 1 ]]; + then + echo "APPLYING PACH" + find * -name $file_to_patch | xargs -I{} echo "patch -p1 -t {} $patch_file" | sh + fi + fi +} + +# Generate file-wise patches using the +generate_pg_patches() +{ + retval=0 + + mkdir $FILES_BASE_DIR + mkdir $FILES_LATEST_DIR + mkdir $FILES_PATCH_DIR + + git clone https://github.com/postgres/postgres.git + + # go into the postgres directory + pushd postgres + + # safety net to ensure that any changes introduced due to git configuration are cleaned up + git checkout . + + #checkout base source code + git checkout $PG_COMMIT_BASE + checkout_validate $PG_COMMIT_BASE $? + copy_files "$WORKING_DIR/$FILES_BASE_DIR" "${pg_header_file_map[@]}" + copy_files "$WORKING_DIR/$FILES_BASE_DIR" "${pg_c_file_map[@]}" + + # safety net to ensure that any changes introduced due to git configuration are cleaned up + git checkout . + + # do the latest checkout + git checkout $PG_COMMIT_LATEST + checkout_validate $PG_COMMIT_LATEST $? + copy_files "$WORKING_DIR/$FILES_LATEST_DIR" "${pg_header_file_map[@]}" + copy_files "$WORKING_DIR/$FILES_LATEST_DIR" "${pg_c_file_map[@]}" + + # go back to the old directory + popd + + # generate patches for the header files + for f in "${pg_header_file_map[@]}"; + do + generate_file_patch "$FILES_BASE_DIR/$f" "$FILES_LATEST_DIR/$f" "$FILES_PATCH_DIR/$f.patch" + done + + # generate patches for the c files + for f in "${pg_c_file_map[@]}"; + do + generate_file_patch "$FILES_BASE_DIR/$f" "$FILES_LATEST_DIR/$f" "$FILES_PATCH_DIR/$f.patch" + done +} + +# Apply patches to the TDE sources +tde_apply_patches() +{ + # check if the $TDE folder exists. If not, then we have to clone it + if [[ ! -d "$TDE_DIR" ]]; + then + t="$(basename $TDE_DIR)" + git clone https://github.com/Percona-Lab/postgres-tde-ext.git $t + fi + + pushd $TDE_DIR + + # do the required checkout + git checkout $TDE_COMMIT + checkout_validate $TDE_COMMIT $? + + # apply patches to the header files + for (( i=0; i < ${#pg_header_file_map[@]}; i++ )); + do + patch_file=$WORKING_DIR/$FILES_PATCH_DIR/${pg_header_file_map[$i]}.patch + apply_file_patch ${tde_header_file_map[$i]} $patch_file + done + + # apply patches to the header files + for (( i=0; i < ${#pg_c_file_map[@]}; i++ )); + do + patch_file=$WORKING_DIR/$FILES_PATCH_DIR/${pg_c_file_map[$i]}.patch + apply_file_patch ${tde_c_file_map[$i]} $patch_file + done +} + +# Check options passed in. +while getopts "haf t:b:l:w:x:" OPTION +do + case $OPTION in + h) + usage + exit_script 1 + ;; + + a) + TDE_DRY_RUN="" + ;; + + f) + APPLY_PATCHES_FORCE=1 + ;; + b) + PG_COMMIT_BASE=$OPTARG + ;; + l) + PG_COMMIT_LATEST=$OPTARG + ;; + t) + TDE_DIR=$OPTARG + ;; + w) + WORK_DIR=$OPTARG + ;; + x) + TDE_COMMIT=$OPTARG + ;; + + ?) + usage + exit_script + ;; + esac +done + +# Validate and update setup +validate_args + +# print the file map +print_map + +# Let's move to the working directory +pushd $WORKING_DIR + +# generate pg patches between the two commits +generate_pg_patches + +# apply patches +tde_apply_patches + +# We're done... +printf "\nJob completed!\n" + +printf "\n\n" +printf "%50s\n" | tr " " "=" +printf "RESULT SUMMARY\n" +printf "%50s\n" | tr " " "=" +printf "Patches Generated = %s\n" $total_patches +printf "Patches Applied = %s\n" $(expr $total_patches - $total_patches_failed) +printf "Patches Failed = %s\n" $total_patches_failed + +if [[ ${#patch_list_unclean[@]} -gt 0 ]]; +then + printf "=> Failed Patch List\n" +fi + +for (( i=0; i < ${#patch_list_unclean[@]}; i++ )); +do + printf "* %s --> %s\n" ${patch_list_unclean[$i]} ${patch_list_unclean[$(expr $i + 1)]} + i=$(expr $i + 1) +done + +# Perform clean up and exit. +exit_script 0