From 817b371161e324bcaeec266175e3b1fbc03c4bc1 Mon Sep 17 00:00:00 2001 From: Kristian Lejao <1741885+lejaokri@users.noreply.github.com> Date: Wed, 18 Oct 2023 08:51:24 -0700 Subject: [PATCH] Another batch of memory management improvement (#1920) Some codepaths access arrays out of bounds or reference uninitialize variables. do_compile() prepare_format_string() pre_transform_target_entry() bbf_ProcessUtility() babelfish_add_domain_mapping_entry_internal() object_name() TdsSendTypeBinary() modify_insert_stmt() Tasks: BABEL-4446, BABEL-4457, BABEL-4456, BABEL-4471, BABEL-4472, BABEL-4483 Signed-off-by: Kristian Lejao --- .../src/backend/tds/tdstypeio.c | 88 +++++++++++-------- contrib/babelfishpg_tsql/runtime/functions.c | 32 ++++--- contrib/babelfishpg_tsql/src/hooks.c | 21 +++-- contrib/babelfishpg_tsql/src/pl_comp.c | 11 ++- contrib/babelfishpg_tsql/src/pl_handler.c | 2 +- contrib/babelfishpg_tsql/src/rolecmds.c | 2 - contrib/babelfishpg_tsql/src/string.c | 3 +- 7 files changed, 91 insertions(+), 68 deletions(-) diff --git a/contrib/babelfishpg_tds/src/backend/tds/tdstypeio.c b/contrib/babelfishpg_tds/src/backend/tds/tdstypeio.c index c801979bc2..38191c77fa 100644 --- a/contrib/babelfishpg_tds/src/backend/tds/tdstypeio.c +++ b/contrib/babelfishpg_tds/src/backend/tds/tdstypeio.c @@ -153,10 +153,10 @@ typedef struct FunctionCacheByTdsIdEntry TdsIoFunctionData data; } FunctionCacheByTdsIdEntry; -/* +/* * This is a modified copy of a function from POSTGIS to get SRID from GSERIALIZED struct */ -static int32_t +static int32_t get_srid(uint8_t *id) { int32_t srid = 0; @@ -242,9 +242,9 @@ getSendFunc(int funcId) case TDS_SEND_DATETIMEOFFSET: return TdsSendTypeDatetimeoffset; case TDS_SEND_GEOMETRY: - return TdsSendTypeGeometry; - case TDS_SEND_GEOGRAPHY: - return TdsSendTypeGeography; + return TdsSendTypeGeometry; + case TDS_SEND_GEOGRAPHY: + return TdsSendTypeGeography; /* TODO: should Assert here once all types are implemented */ default: return NULL; @@ -321,8 +321,8 @@ getRecvFunc(int funcId) case TDS_RECV_DATETIMEOFFSET: return TdsRecvTypeDatetimeoffset; case TDS_RECV_GEOMETRY: - return TdsRecvTypeGeometry; - case TDS_RECV_GEOGRAPHY: + return TdsRecvTypeGeometry; + case TDS_RECV_GEOGRAPHY: return TdsRecvTypeGeography; /* TODO: should Assert here once all types are implemented */ default: @@ -2014,7 +2014,7 @@ TdsRecvTypeDatetime2(const char *message, const ParameterToken token) Datum TdsRecvTypeGeometry(const char *message, const ParameterToken token) { - Datum result = 0; + Datum result = 0; /* Decode binary and convert if needed */ StringInfo buf = TdsGetStringInfoBufferFromToken(message, token); @@ -2023,17 +2023,17 @@ TdsRecvTypeGeometry(const char *message, const ParameterToken token) ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), - errmsg("Prepared Queries for Geometry DataType Currently not Supported in BabelFish"))); + errmsg("Prepared Queries for Geometry DataType Currently not Supported in BabelFish"))); - pfree(buf); - return result; + pfree(buf); + return result; } /* ------------------------------- * TdsRecvTypeGeography - converts external binary format to - * Geography data type + * Geography data type * -------------------------------- - */ + */ /* * It is a Placeholder Function for now * TODO: Will need to address it in subsequent Code Changes @@ -2041,7 +2041,7 @@ TdsRecvTypeGeometry(const char *message, const ParameterToken token) Datum TdsRecvTypeGeography(const char *message, const ParameterToken token) { - Datum result = 0; + Datum result = 0; /* Decode binary and convert if needed */ StringInfo buf = TdsGetStringInfoBufferFromToken(message, token); @@ -2052,8 +2052,8 @@ TdsRecvTypeGeography(const char *message, const ParameterToken token) (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("Prepared Queries for Geography DataType Currently not Supported in BabelFish"))); - pfree(buf); - return result; + pfree(buf); + return result; } static inline uint128 @@ -2426,8 +2426,8 @@ TdsRecvTypeTable(const char *message, const ParameterToken token) case TDS_TYPE_SQLVARIANT: values[i] = TdsTypeSqlVariantToDatum(temp); break; - case TDS_TYPE_SPATIAL: - break; + case TDS_TYPE_SPATIAL: + break; } /* Build a string for bind parameters. */ if (colMetaData[currentColumn].columnTdsType != TDS_TYPE_NVARCHAR || row->isNull[currentColumn] == 'n') @@ -2700,10 +2700,13 @@ TdsSendTypeBinary(FmgrInfo *finfo, Datum value, void *vMetaData) maxLen = 0; bytea *vlena = DatumGetByteaPCopy(value); bytea *buf; + int copyLen = 0; TdsColumnMetaData *col = (TdsColumnMetaData *) vMetaData; maxLen = col->metaEntry.type7.maxSize; - buf = (bytea *) palloc0(sizeof(bytea) * maxLen); + copyLen = Max((sizeof(bytea) * maxLen), VARSIZE_ANY_EXHDR(vlena)); + + buf = (bytea *) palloc0(copyLen); memcpy(buf, VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena)); if ((rc = TdsPutUInt16LE(maxLen)) == 0) @@ -3139,6 +3142,7 @@ TdsSendTypeNumeric(FmgrInfo *finfo, Datum value, void *vMetaData) TdsColumnMetaData *col = (TdsColumnMetaData *) vMetaData; uint8_t max_scale = col->metaEntry.type5.scale; uint8_t max_precision = col->metaEntry.type5.precision; + int target_precision = 0; out = OutputFunctionCall(finfo, value); if (out[0] == '-') @@ -3175,24 +3179,34 @@ TdsSendTypeNumeric(FmgrInfo *finfo, Datum value, void *vMetaData) if (scale == -1) scale = 0; + /* Perform the overflow check before scribbling on to decString. */ + target_precision = precision + (max_scale - scale); + if (target_precision > TDS_MAX_NUM_PRECISION || + target_precision > max_precision) + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("Arithmetic overflow error for data type numeric."))); + /* - * Fill in the remaining 0's if the processed scale from out is less than - * max_scale This is needed because the output generated by engine may not - * always produce the same precision/scale as calculated by - * resolve_numeric_typmod_from_exp, which is the precision/scale we have - * sent to the client with column metadata. - */ + * Fill in the remaining 0's if the processed scale from out is less than + * max_scale This is needed because the output generated by engine may not + * always produce the same precision/scale as calculated by + * resolve_numeric_typmod_from_exp, which is the precision/scale we have + * sent to the client with column metadata. + */ while (scale++ < max_scale) { decString[precision++] = '0'; } decString[precision] = '\0'; - Assert(precision <= outlen); + Assert(precision == target_precision); - if (precision > TDS_MAX_NUM_PRECISION || - precision > max_precision) - ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("Arithmetic overflow error for data type numeric."))); + /* + * Verify that we did not go beyond the memory allocated. + * We allow precision < outlen. Consider the case when + * out="123.456", max_scale=8. Then by the end, precision=11 + * but outlen=15. + */ + Assert(precision <= outlen); if (precision >= 1 && precision < 10) length = 4; @@ -4150,16 +4164,16 @@ TdsSendTypeDatetimeoffset(FmgrInfo *finfo, Datum value, void *vMetaData) return rc; } -int +int TdsSendSpatialHelper(FmgrInfo *finfo, Datum value, void *vMetaData, int TdsInstr) { int rc = EOF, npoints, - len, /* number of bytes used to store the string. */ + len, /* number of bytes used to store the string. */ actualLen; /* Number of bytes that would be needed to * store given string in given encoding. */ - char *destBuf, - *buf, + char *destBuf, + *buf, *itr; int32_t srid; @@ -4175,7 +4189,7 @@ TdsSendSpatialHelper(FmgrInfo *finfo, Datum value, void *vMetaData, int TdsInstr * 16 -> 2 8-Byte float coordinates (TODO: Need to change when Z and M flags are defined for N-dimension Points) * 6 -> 4 Byte SRID + 2 Byte (01 0C) */ - len = npoints*16 + 6; + len = npoints*16 + 6; buf = (char *) palloc0(len); /* Driver Expects 4 Byte SRID */ @@ -4196,8 +4210,8 @@ TdsSendSpatialHelper(FmgrInfo *finfo, Datum value, void *vMetaData, int TdsInstr * First 8 Bytes of gser->data are fixed in PostGIS: * 4 Bytes -> Represents the Type * 4 Bytes -> Represents the npoints - */ - memcpy(itr, (char *) gser->data + 8, len - 6); + */ + memcpy(itr, (char *) gser->data + 8, len - 6); destBuf = TdsEncodingConversion(buf, len, PG_UTF8, col->encoding, &actualLen); diff --git a/contrib/babelfishpg_tsql/runtime/functions.c b/contrib/babelfishpg_tsql/runtime/functions.c index ee190bf126..19fbb0d113 100644 --- a/contrib/babelfishpg_tsql/runtime/functions.c +++ b/contrib/babelfishpg_tsql/runtime/functions.c @@ -1520,7 +1520,7 @@ object_name(PG_FUNCTION_ARGS) SysScanDesc tgscan; EphemeralNamedRelation enr; bool found = false; - char *result = NULL; + text *result_text = NULL; if (input1 < 0) PG_RETURN_NULL(); @@ -1552,9 +1552,7 @@ object_name(PG_FUNCTION_ARGS) enr = get_ENR_withoid(currentQueryEnv, object_id, ENR_TSQL_TEMP); if (enr != NULL && enr->md.enrtype == ENR_TSQL_TEMP) { - result = enr->md.name; - - PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text(result)); + PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text(enr->md.name)); } /* search in pg_class by object_id */ @@ -1565,8 +1563,7 @@ object_name(PG_FUNCTION_ARGS) if (pg_class_aclcheck(object_id, user_id, ACL_SELECT) == ACLCHECK_OK) { Form_pg_class pg_class = (Form_pg_class) GETSTRUCT(tuple); - result = NameStr(pg_class->relname); - + result_text = cstring_to_text(NameStr(pg_class->relname)); // make a copy before releasing syscache schema_id = pg_class->relnamespace; } ReleaseSysCache(tuple); @@ -1583,8 +1580,7 @@ object_name(PG_FUNCTION_ARGS) if (pg_proc_aclcheck(object_id, user_id, ACL_EXECUTE) == ACLCHECK_OK) { Form_pg_proc procform = (Form_pg_proc) GETSTRUCT(tuple); - result = NameStr(procform->proname); - + result_text = cstring_to_text(NameStr(procform->proname)); schema_id = procform->pronamespace; } ReleaseSysCache(tuple); @@ -1602,7 +1598,7 @@ object_name(PG_FUNCTION_ARGS) if (pg_type_aclcheck(object_id, user_id, ACL_USAGE) == ACLCHECK_OK) { Form_pg_type pg_type = (Form_pg_type) GETSTRUCT(tuple); - result = NameStr(pg_type->typname); + result_text = cstring_to_text(NameStr(pg_type->typname)); } ReleaseSysCache(tuple); found = true; @@ -1630,8 +1626,7 @@ object_name(PG_FUNCTION_ARGS) if (OidIsValid(pg_trigger->tgrelid) && pg_class_aclcheck(pg_trigger->tgrelid, user_id, ACL_SELECT) == ACLCHECK_OK) { - result = NameStr(pg_trigger->tgname); - + result_text = cstring_to_text(NameStr(pg_trigger->tgname)); schema_id = get_rel_namespace(pg_trigger->tgrelid); } found = true; @@ -1651,8 +1646,7 @@ object_name(PG_FUNCTION_ARGS) /* check if user have right permission on object */ if (OidIsValid(con->conrelid) && (pg_class_aclcheck(con->conrelid, user_id, ACL_SELECT) == ACLCHECK_OK)) { - result = NameStr(con->conname); - + result_text = cstring_to_text(NameStr(con->conname)); schema_id = con->connamespace; } ReleaseSysCache(tuple); @@ -1660,7 +1654,7 @@ object_name(PG_FUNCTION_ARGS) } } - if (result) + if (result_text) { /* * Check if schema corresponding to found object belongs to specified @@ -1668,9 +1662,13 @@ object_name(PG_FUNCTION_ARGS) * "information_schema_tsql". In case of pg_type schema_id will be * invalid. */ - if (!OidIsValid(schema_id) || is_schema_from_db(schema_id, database_id) - || (schema_id == get_namespace_oid("sys", true)) || (schema_id == get_namespace_oid("information_schema_tsql", true))) - PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text(result)); + if (!OidIsValid(schema_id) || + is_schema_from_db(schema_id, database_id) || + (schema_id == get_namespace_oid("sys", true)) || + (schema_id == get_namespace_oid("information_schema_tsql", true))) + { + PG_RETURN_VARCHAR_P((VarChar *) result_text); + } } PG_RETURN_NULL(); } diff --git a/contrib/babelfishpg_tsql/src/hooks.c b/contrib/babelfishpg_tsql/src/hooks.c index a20b2f0462..7129b46f38 100644 --- a/contrib/babelfishpg_tsql/src/hooks.c +++ b/contrib/babelfishpg_tsql/src/hooks.c @@ -1759,15 +1759,16 @@ pre_transform_target_entry(ResTarget *res, ParseState *pstate, if(actual_alias_len > alias_len) { /* First 32 characters of original_name are assigned to alias. */ - memcpy(alias, original_name, (alias_len - 32) ); + memcpy(alias, original_name, (alias_len - 32)); + /* Last 32 characters of identifier_name are assigned to alias, as actual alias is truncated. */ - memcpy(alias + (alias_len) - 32, - identifier_name + (alias_len) - 32, - 32); + memcpy(alias + (alias_len - 32), + identifier_name + (alias_len - 32), + 32); + alias[alias_len] = '\0'; } - /* Identifier is not truncated. */ - else + else /* Identifier is not truncated. */ { memcpy(alias, original_name, actual_alias_len); } @@ -2636,7 +2637,13 @@ modify_insert_stmt(InsertStmt *stmt, Oid relid) if (att->attnum > 0) { - col->name = NameStr(att->attname); + /* + * Do a deep copy of attname because tuple is a pointer + * to a shared_buffer page which is released when scan + * is ended. + */ + col->name = pstrdup(NameStr(att->attname)); + col->indirection = NIL; col->val = NULL; col->location = 1; diff --git a/contrib/babelfishpg_tsql/src/pl_comp.c b/contrib/babelfishpg_tsql/src/pl_comp.c index 78fd303354..a4d4150056 100644 --- a/contrib/babelfishpg_tsql/src/pl_comp.c +++ b/contrib/babelfishpg_tsql/src/pl_comp.c @@ -465,6 +465,7 @@ do_compile(FunctionCallInfo fcinfo, PLtsql_type *argdtype; PLtsql_variable *argvariable; PLtsql_nsitem_type argitemtype; + int typmod = 0; /* Create $n name for variable */ snprintf(buf, sizeof(buf), "$%d", i + 1); @@ -496,9 +497,15 @@ do_compile(FunctionCallInfo fcinfo, function->is_mstvf = true; } - /* Create datatype info */ + /* + * Create datatype info. + * typmods array has length procStruct->pronargs and numargs >= procStruct->pronargs, + * (See Assert in get_func_arg_info()) + * Pass in typmods[i] while we are not out of bounds, otherwise pass in -1. + */ + typmod = (typmods && i < procStruct->pronargs) ? typmods[i] : -1; argdtype = pltsql_build_datatype(argtypeid, - (typmods ? typmods[i] : -1), + typmod, function->fn_input_collation, NULL); diff --git a/contrib/babelfishpg_tsql/src/pl_handler.c b/contrib/babelfishpg_tsql/src/pl_handler.c index 100ec27743..9d798d06a6 100644 --- a/contrib/babelfishpg_tsql/src/pl_handler.c +++ b/contrib/babelfishpg_tsql/src/pl_handler.c @@ -3498,7 +3498,7 @@ bbf_ProcessUtility(PlannedStmt *pstmt, Constraint *con; con = get_rowversion_default_constraint(makeTypeNameFromOid(attr->atttypid, attr->atttypmod)); - rawEnt = (RawColumnDefault *) palloc(sizeof(RawColumnDefault)); + rawEnt = (RawColumnDefault *) palloc0(sizeof(RawColumnDefault)); rawEnt->attnum = attr_num + 1; rawEnt->raw_default = (Node *) con->raw_expr; AddRelationNewConstraints(rel, list_make1(rawEnt), NIL, diff --git a/contrib/babelfishpg_tsql/src/rolecmds.c b/contrib/babelfishpg_tsql/src/rolecmds.c index 40a574039c..eb30b2dfee 100644 --- a/contrib/babelfishpg_tsql/src/rolecmds.c +++ b/contrib/babelfishpg_tsql/src/rolecmds.c @@ -2136,8 +2136,6 @@ babelfish_add_domain_mapping_entry_internal(PG_FUNCTION_ARGS) new_record = palloc0(sizeof(Datum) * BBF_DOMAIN_MAPPING_NUM_COLS); new_record_nulls = palloc0(sizeof(bool) * BBF_DOMAIN_MAPPING_NUM_COLS); - MemSet(new_record_nulls, false, sizeof(new_record_nulls)); - new_record[0] = PG_GETARG_DATUM(0); new_record[1] = PG_GETARG_DATUM(1); diff --git a/contrib/babelfishpg_tsql/src/string.c b/contrib/babelfishpg_tsql/src/string.c index 40681a5df3..6126e60768 100644 --- a/contrib/babelfishpg_tsql/src/string.c +++ b/contrib/babelfishpg_tsql/src/string.c @@ -407,8 +407,6 @@ prepare_format_string(StringInfo buf, char *msg_string, int nargs, strncpy(fmt_seg, seg_start, seg_len); prev_fmt_seg_sz = seg_len; - arg = args[i]; - if (i >= nargs || argisnull[i]) { appendStringInfo(buf, "(null)"); @@ -416,6 +414,7 @@ prepare_format_string(StringInfo buf, char *msg_string, int nargs, continue; } + arg = args[i]; typid = argtypes[i]; type = TypeCategory(typid);