Skip to content

Commit

Permalink
feat(ibm-use-date-based-format): introduce new validation rule
Browse files Browse the repository at this point in the history
This commit introduces the new 'ibm-use-date-based-format' rule,
which will heuristically verify that schemas, with either a name
or an example value indicating a date-based logical type, be
strings and use either "date" or "date-time" as the format.

Signed-off-by: Dustin Popp <[email protected]>
  • Loading branch information
dpopp07 committed Dec 19, 2024
1 parent 1ea4d57 commit 7856f0a
Show file tree
Hide file tree
Showing 13 changed files with 4,649 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"files": "package-lock.json|^.secrets.baseline$",
"lines": null
},
"generated_at": "2024-12-16T19:27:38Z",
"generated_at": "2024-12-19T16:14:03Z",
"plugins_used": [
{
"name": "AWSKeyDetector"
Expand Down
1 change: 1 addition & 0 deletions packages/ruleset/src/functions/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ module.exports = {
unevaluatedProperties: require('./unevaluated-properties'),
uniqueParameterRequestPropertyNames: require('./unique-parameter-request-property-names'),
unusedTags: require('./unused-tags'),
useDateBasedFormat: require('./use-date-based-format'),
validatePathSegments: require('./valid-path-segments'),
wellDefinedDictionaries: require('./well-defined-dictionaries'),
};
2 changes: 1 addition & 1 deletion packages/ruleset/src/functions/no-ambiguous-paths.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ module.exports = function (paths, _options, context) {
* 1. "/v1/clouds/{id}", "/v1/clouds/{cloud_id}"
* 2. "/v1/clouds/foo", "/v1/clouds/{cloud_id}"
* 3. "/v1/{resource_type}/foo", "/v1/users/{user_id}"
* @param {*} apidef the entire API definition
* @param {*} paths map containing all path objects
* @returns an array containing zero or more error objects
*/
function checkAmbiguousPaths(paths) {
Expand Down
353 changes: 353 additions & 0 deletions packages/ruleset/src/functions/use-date-based-format.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,353 @@
/**
* Copyright 2024 IBM Corporation.
* SPDX-License-Identifier: Apache2.0
*/

const {
isArraySchema,
isDateSchema,
isDateTimeSchema,
isIntegerSchema,
isObject,
isObjectSchema,
isStringSchema,
schemaHasConstraint,
validateSubschemas,
} = require('@ibm-cloud/openapi-ruleset-utilities');

const {
LoggerFactory,
isDateBasedName,
isDateBasedValue,
isParamContentSchema,
isParamSchema,
isRequestBodySchema,
isResponseSchema,
isSchemaProperty,
} = require('../utils');

let ruleId;
let logger;

/**
* The implementation for this rule makes assumptions that are dependent on the
* presence of the following other rules:
*
* - oas3-valid-media-example
* - oas3-valid-schema-example
*
* These rules verify that the correct, specific format (date vs date-time) is
* used for schemas based on their example value. So, we aren't as specific
* with that check in this rule - we recommend either "date" or "date-time".
*/

module.exports = function (schema, _opts, context) {
if (!logger) {
ruleId = context.rule.name;
logger = LoggerFactory.getInstance().getLogger(ruleId);
}

return checkForDateBasedFormat(
schema,
context.path,
context.documentInventory.resolved
);
};

/**
* This function implements a rule that enforces date-based schemas use either
* the "date" or "date-time" format, so that they're accurately documented as
* date-based logical types. We use a heuristic based on either the name of a
* schema (derived from the property name, if the schema is a property schema)
* or the example value provided for a given schema or schema property.
*
* The logic here recursively checks all schemas for the presence of unmarked
* date-based schemas. As it traverses the schemas, it compiles a list of
* potentially-relevant example values. This way, if an object schema defines
* its own example, which includes a value for a nested property that should
* be identified by the rule, we can track down the value once we reach the
* schema for said property. The logic will also gather any relevant parameter
* or media type examples that may be defined outside of the schema path.
*
* @param {object} s the schema to check
* @param {array} p the array of path segments indicating the "location" of the schema within the API definition
* @param {object} apidef the resolved API definition
* @returns an array containing the violations found or [] if no violations
*/
function checkForDateBasedFormat(s, p, apidef) {
const examples = [];
const propertyPath = [];

return validateSubschemas(s, p, (schema, path) => {
logger.debug(`${ruleId}: checking schema at location: ${path.join('.')}`);

// Check for any examples outside of the schema path - they may be in
// request bodies, response bodies, or parameters.
examples.push(...checkForIndirectExamples(path, apidef));

// We can look at primitive schemas directly but for objects and arrays,
// we need more specialized handling in case we need to find a particular
// property within their examples.
if (isObjectSchema(schema) || isArraySchema(schema)) {
// Maintain a running path to each schema that we look at. This will be
// used to determine where to look for a property value within an example
// object, relative to that example's location.
if (isSchemaProperty(path)) {
propertyPath.push(path.at(-1));
}

// Keep a running hierarchy of all examples we find as we look through
// the schemas in the API. Nested properties may only have an example
// value defined within a parent schema example.
if (schema.example) {
logger.debug(
`${ruleId}: adding example for schema at location: ${path.join('.')}`
);

examples.push({
example: schema.example,
examplePath: propertyPath.slice(), // Use a copy to prevent modification.
});
}

// Add sentinels for arrays/dictionaries to the running path,
// to assist the example-parsing logic later on. This must come
// after we push the example to the list.
if (isSchemaProperty(path)) {
if (isArraySchema(schema)) {
propertyPath.push('[]');
}

if (isDictionarySchema(schema)) {
propertyPath.push('{}');
}
}
}

// Use a slice (a copy) of the `propertyPath` array so that the
// invoked function can modify it without modifying the original.
return performValidation(
schema,
path,
apidef,
propertyPath.slice(),
examples
);
});
}

// This function performs the actual checks against a schema to determine if
// it should be a "date" or "date-time" schema, but isn't defined as one.
// It is wrapped in the outer function for the gathering of examples, etc. but
// this function implements the checks: 1) see if the name of a property
// indicates that it is a date-based schema and 2) see if the example value for
// a schema indicates that it is a date-based schema.
function performValidation(schema, path, apidef, propertyPath, examples) {
// If this is already a date or date-time schema, no need to check if it should be.
if (isDateSchema(schema) || isDateTimeSchema(schema)) {
logger.debug(
`${ruleId}: skipping date-based schema at location: ${path.join('.')}`
);

return [];
}

// Check for a name that would indicate the property should be date-based.
const hasDateTimeName =
isSchemaProperty(path) && isDateBasedName(path.at(-1));

logger.debug(
`${ruleId}: property at location: ${path.join('.')} has a date-based name`
);

if (hasDateTimeName && (isStringSchema(schema) || isIntegerSchema(schema))) {
// If the schema is determined to be a date-time schema by the name alone,
// we can return - no need to look for an example value.
return [
{
message:
'According to its name, this property should use type "string" and format "date" or "date-time"',
path,
},
];
}

// Check example values for string schemas.
if (isStringSchema(schema)) {
// If this is a property, we need to include its name in the path.
if (isSchemaProperty(path)) {
propertyPath.push(path.at(-1));
}

// Either use the schema example directly or search the list of examples
// for an example object that contains a value for this property.
const exampleValue = schema.example || findExample(propertyPath, examples);
if (exampleValue) {
logger.debug(
`${ruleId}: example value found for string schema at location ${path.join(
'.'
)}: ${exampleValue}`
);

if (isDateBasedValue(exampleValue)) {
return [
{
message:
'According to its example value, this schema should use type "string" and format "date" or "date-time"',
path,
},
];
}
}
}

return [];
}

// This function checks all of the examples we've gathered while processing
// schemas to check if once of them defines a value for the specific property
// or string schema that we are looking at. It returns the first value found.
function findExample(propertyPath, examples) {
let exampleValue;

// According to the OpenAPI specification, Media Type/Parameter examples
// override any examples defined on the schemas themselves. Going "in order"
// through this loop ensures we prioritize those examples, followed by
// higher-level schema examples. If it turns out that we should prioritize
// nested examples, we can simply reverse this loop.
for (const { example, examplePath } of examples) {
// First thing is to find the relevant segment of the property path relative
// to the example path, which should be the first element where they differ.
const index = propertyPath.findIndex((prop, i) => prop !== examplePath[i]);
const value = getObjectValueAtPath(example, propertyPath.slice(index));

// If we find a value, go ahead and break from the loop.
if (value) {
logger.debug(
`${ruleId}: value found in example at location: ${examplePath.join(
'.'
)}`
);

exampleValue = value;
break;
}
}

logger.debug(
`${ruleId}: no example value found for schema at location: ${propertyPath.join(
'.'
)}`
);

// This will return `undefined` if we never find a value;
return exampleValue;
}

// This function takes an object, as well as a path to a specific value, and
// recursively parses the object looking for the value at that path. If it
// finds one, the value will be returned. If not, the function will return
// `undefined`. One important note is that the array given as the `pathToValue`
// argument *will* be modified by the logic, so if that is not desired, a copy
// should be passed by the caller (using .slice(), for example).
function getObjectValueAtPath(obj, pathToValue) {
if (!pathToValue.length) {
return obj;
}

const p = pathToValue.shift();

// Check for sentinel indicating an array.
if (p === '[]' && Array.isArray(obj) && obj.length) {
return getObjectValueAtPath(obj[0], pathToValue);
}

// Check for sentinel indicating a dictionary.
if (p === '{}' && isObject(obj) && Object.values(obj).length) {
return getObjectValueAtPath(Object.values(obj)[0], pathToValue);
}

// Standard model path.
if (obj[p]) {
return getObjectValueAtPath(obj[p], pathToValue);
}

// Return undefined if we don't find anything.
return;
}

// "Indirect" examples are those coming from request bodies, response bodies, and parameters.
function checkForIndirectExamples(path, apidef) {
// Parameter and Media Type objects have the same format when it comes
// to examples, so we can treat all of these scenarios the same way.
if (
isRequestBodySchema(path) ||
isResponseSchema(path) ||
isParamSchema(path) ||
isParamContentSchema(path)
) {
// Example fields would be siblings of the schema we're looking at, so we need to look in the API
// for the path, minus the last value (which is "schema").
const examples = getOpenApiExamples(
getObjectValueAtPath(apidef, path.slice(0, -1))
);

// Check for the special case of looking at a content schema for a parameter that
// itself defines an example (pull the last three values off the path to check).
if (isParamContentSchema(path)) {
examples.push(
...getOpenApiExamples(getObjectValueAtPath(apidef, path.slice(0, -3)))
);
}

logger.debug(
`${ruleId}: ${
examples.length
} indirect examples found for schema at location: ${path.join('.')}`
);

// Put the examples in the format the downstream algorithm for this rule needs.
return examples.map(example => {
return {
example,
examplePath: [], // All top-level examples get an empty array for the path.
};
});
}

return [];
}

// OpenAPI defines its own example structure, separate from schema examples,
// on Parameter and Media Type objects. Use this function to parse those
// structures and return any relevant examples. The argument may be either a
// Parameter or Media Type object and will return a list.
function getOpenApiExamples(artifact) {
if (!isObject(artifact)) {
return [];
}

// The `example` and `examples` fields are mutually exclusive.
if (artifact.example) {
return [artifact.example];
}

// This will be a map, potentially containing multiple examples. Return all of them.
if (artifact.examples) {
return Object.values(artifact.examples).map(
exampleObject => exampleObject.value
);
}

return [];
}

// This function determines if a schema is a "dictionary" (as opposed to a
// standard model with static properties) based on the presence of either
// `additionalProperties` or `patternProperties` (OpenAPI 3.1 only).
function isDictionarySchema(schema) {
return schemaHasConstraint(
schema,
s => isObjectSchema(s) && (s.additionalProperties || s.patternProperties)
);
}
1 change: 1 addition & 0 deletions packages/ruleset/src/ibm-oas.js
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ module.exports = {
'ibm-unevaluated-properties': ibmRules.unevaluatedProperties,
'ibm-unique-parameter-request-property-names':
ibmRules.uniqueParameterRequestPropertyNames,
'ibm-use-date-based-format': ibmRules.useDateBasedFormat,
'ibm-valid-path-segments': ibmRules.validPathSegments,
'ibm-well-defined-dictionaries': ibmRules.wellDefinedDictionaries,
},
Expand Down
1 change: 1 addition & 0 deletions packages/ruleset/src/rules/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ module.exports = {
unevaluatedProperties: require('./unevaluated-properties'),
unusedTags: require('./unused-tags'),
uniqueParameterRequestPropertyNames: require('./unique-parameter-request-property-names'),
useDateBasedFormat: require('./use-date-based-format'),
validPathSegments: require('./valid-path-segments'),
wellDefinedDictionaries: require('./well-defined-dictionaries'),
};
Loading

0 comments on commit 7856f0a

Please sign in to comment.