Skip to content

Commit

Permalink
[clang-tidy] Only expand <inttypes.h> macros in modernize-use-std-for…
Browse files Browse the repository at this point in the history
…mat/print

Expanding all macros in the printf/absl::StrFormat format string before
conversion could easily break code if those macros are expended to
change their definition between builds. It's important for this check to
expand the <inttypes.h> PRI macros though, so let's ensure that the
presence of any other macros in the format string causes the check to
emit a warning and not perform any conversion.
  • Loading branch information
mikecrowe committed Oct 5, 2024
1 parent e8f01b0 commit 2dd0902
Show file tree
Hide file tree
Showing 12 changed files with 257 additions and 41 deletions.
7 changes: 4 additions & 3 deletions clang-tools-extra/clang-tidy/modernize/UseStdFormatCheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ void UseStdFormatCheck::registerPPCallbacks(const SourceManager &SM,
Preprocessor *PP,
Preprocessor *ModuleExpanderPP) {
IncludeInserter.registerPreprocessor(PP);
this->PP = PP;
}

void UseStdFormatCheck::registerMatchers(MatchFinder *Finder) {
Expand Down Expand Up @@ -75,9 +76,9 @@ void UseStdFormatCheck::check(const MatchFinder::MatchResult &Result) {

utils::FormatStringConverter::Configuration ConverterConfig;
ConverterConfig.StrictMode = StrictMode;
utils::FormatStringConverter Converter(Result.Context, StrFormat,
FormatArgOffset, ConverterConfig,
getLangOpts());
utils::FormatStringConverter Converter(
Result.Context, StrFormat, FormatArgOffset, ConverterConfig,
getLangOpts(), *Result.SourceManager, *PP);
const Expr *StrFormatCall = StrFormat->getCallee();
if (!Converter.canApply()) {
diag(StrFormat->getBeginLoc(),
Expand Down
1 change: 1 addition & 0 deletions clang-tools-extra/clang-tidy/modernize/UseStdFormatCheck.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class UseStdFormatCheck : public ClangTidyCheck {
StringRef ReplacementFormatFunction;
utils::IncludeInserter IncludeInserter;
std::optional<StringRef> MaybeHeaderToInclude;
Preprocessor *PP = nullptr;
};

} // namespace clang::tidy::modernize
Expand Down
4 changes: 3 additions & 1 deletion clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ void UseStdPrintCheck::registerPPCallbacks(const SourceManager &SM,
Preprocessor *PP,
Preprocessor *ModuleExpanderPP) {
IncludeInserter.registerPreprocessor(PP);
this->PP = PP;
}

static clang::ast_matchers::StatementMatcher
Expand Down Expand Up @@ -131,7 +132,8 @@ void UseStdPrintCheck::check(const MatchFinder::MatchResult &Result) {
ConverterConfig.StrictMode = StrictMode;
ConverterConfig.AllowTrailingNewlineRemoval = true;
utils::FormatStringConverter Converter(
Result.Context, Printf, FormatArgOffset, ConverterConfig, getLangOpts());
Result.Context, Printf, FormatArgOffset, ConverterConfig, getLangOpts(),
*Result.SourceManager, *PP);
const Expr *PrintfCall = Printf->getCallee();
const StringRef ReplacementFunction = Converter.usePrintNewlineFunction()
? ReplacementPrintlnFunction
Expand Down
1 change: 1 addition & 0 deletions clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class UseStdPrintCheck : public ClangTidyCheck {
}

private:
Preprocessor *PP;
bool StrictMode;
std::vector<StringRef> PrintfLikeFunctions;
std::vector<StringRef> FprintfLikeFunctions;
Expand Down
65 changes: 60 additions & 5 deletions clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Lex/Lexer.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Tooling/FixIt.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Debug.h"
Expand Down Expand Up @@ -195,11 +196,10 @@ static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode) {
return false;
}

FormatStringConverter::FormatStringConverter(ASTContext *ContextIn,
const CallExpr *Call,
unsigned FormatArgOffset,
const Configuration ConfigIn,
const LangOptions &LO)
FormatStringConverter::FormatStringConverter(
ASTContext *ContextIn, const CallExpr *Call, unsigned FormatArgOffset,
const Configuration ConfigIn, const LangOptions &LO, SourceManager &SM,
Preprocessor &PP)
: Context(ContextIn), Config(ConfigIn),
CastMismatchedIntegerTypes(
castMismatchedIntegerTypes(Call, ConfigIn.StrictMode)),
Expand All @@ -208,11 +208,22 @@ FormatStringConverter::FormatStringConverter(ASTContext *ContextIn,
assert(ArgsOffset <= NumArgs);
FormatExpr = llvm::dyn_cast<StringLiteral>(
Args[FormatArgOffset]->IgnoreImplicitAsWritten());

if (!FormatExpr || !FormatExpr->isOrdinary()) {
// Function must have a narrow string literal as its first argument.
conversionNotPossible("first argument is not a narrow string literal");
return;
}

if (const std::optional<StringRef> MaybeMacroName =
formatStringContainsUnreplaceableMacro(Call, FormatExpr, SM, PP);
MaybeMacroName) {
conversionNotPossible(
("format string contains unreplaceable macro '" + *MaybeMacroName + "'")
.str());
return;
}

PrintfFormatString = FormatExpr->getString();

// Assume that the output will be approximately the same size as the input,
Expand All @@ -230,6 +241,50 @@ FormatStringConverter::FormatStringConverter(ASTContext *ContextIn,
finalizeFormatText();
}

std::optional<StringRef>
FormatStringConverter::formatStringContainsUnreplaceableMacro(
const CallExpr *Call, const StringLiteral *FormatExpr, SourceManager &SM,
Preprocessor &PP) {
// If a macro invocation surrounds the entire call then we don't want that to
// inhibit conversion. The whole format string will appear to come from that
// macro, as will the function call.
std::optional<StringRef> MaybeSurroundingMacroName;
if (SourceLocation BeginCallLoc = Call->getBeginLoc();
BeginCallLoc.isMacroID())
MaybeSurroundingMacroName =
Lexer::getImmediateMacroName(BeginCallLoc, SM, PP.getLangOpts());

for (auto I = FormatExpr->tokloc_begin(), E = FormatExpr->tokloc_end();
I != E; ++I) {
const SourceLocation &TokenLoc = *I;
if (TokenLoc.isMacroID()) {
const StringRef MacroName =
Lexer::getImmediateMacroName(TokenLoc, SM, PP.getLangOpts());

if (MaybeSurroundingMacroName != MacroName) {
// glibc uses __PRI64_PREFIX and __PRIPTR_PREFIX to define the prefixes
// for types that change size so we must look for multiple prefixes.
if (!MacroName.starts_with("PRI") && !MacroName.starts_with("__PRI"))
return MacroName;

const SourceLocation TokenSpellingLoc = SM.getSpellingLoc(TokenLoc);
const OptionalFileEntryRef MaybeFileEntry =
SM.getFileEntryRefForID(SM.getFileID(TokenSpellingLoc));
if (!MaybeFileEntry)
return MacroName;

HeaderSearch &HS = PP.getHeaderSearchInfo();
// Check if the file is a system header
if (!isSystem(HS.getFileDirFlavor(*MaybeFileEntry)) ||
llvm::sys::path::filename(MaybeFileEntry->getName()) !=
"inttypes.h")
return MacroName;
}
}
}
return std::nullopt;
}

void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS,
std::string &FormatSpec) {
ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind();
Expand Down
7 changes: 6 additions & 1 deletion clang-tools-extra/clang-tidy/utils/FormatStringConverter.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ class FormatStringConverter

FormatStringConverter(ASTContext *Context, const CallExpr *Call,
unsigned FormatArgOffset, Configuration Config,
const LangOptions &LO);
const LangOptions &LO, SourceManager &SM,
Preprocessor &PP);

bool canApply() const { return ConversionNotPossibleReason.empty(); }
const std::string &conversionNotPossibleReason() const {
Expand Down Expand Up @@ -110,6 +111,10 @@ class FormatStringConverter

void appendFormatText(StringRef Text);
void finalizeFormatText();
static std::optional<StringRef>
formatStringContainsUnreplaceableMacro(const CallExpr *CallExpr,
const StringLiteral *FormatExpr,
SourceManager &SM, Preprocessor &PP);
bool conversionNotPossible(std::string Reason) {
ConversionNotPossibleReason = std::move(Reason);
return false;
Expand Down
10 changes: 10 additions & 0 deletions clang-tools-extra/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,20 @@ Changes in existing checks
<clang-tidy/checks/modernize/use-std-format>` check to support replacing
member function calls too.

- Improved :doc:`modernize-use-std-format
<clang-tidy/checks/modernize/use-std-format>` check to only expand macros
starting with ``PRI`` and ``__PRI`` from ``<inttypes.h>`` in the format
string.

- Improved :doc:`modernize-use-std-print
<clang-tidy/checks/modernize/use-std-print>` check to support replacing
member function calls too.

- Improved :doc:`modernize-use-std-print
<clang-tidy/checks/modernize/use-std-print>` check to only expand macros
starting with ``PRI`` and ``__PRI`` from ``<inttypes.h>`` in the format
string.

- Improved :doc:`performance-avoid-endl
<clang-tidy/checks/performance/avoid-endl>` check to use ``std::endl`` as
placeholder when lexer cannot get source text.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ into:

The check uses the same format-string-conversion algorithm as
`modernize-use-std-print <../modernize/use-std-print.html>`_ and its
shortcomings are described in the documentation for that check.
shortcomings and behaviour in combination with macros are described in the
documentation for that check.

Options
-------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,15 @@ into:
std::println(stderr, "The {} is {:3}", description, value);

If the `ReplacementPrintFunction` or `ReplacementPrintlnFunction` options
are left, or assigned to their default values then this check is only
enabled with `-std=c++23` or later.
are left at or set to their default values then this check is only enabled
with `-std=c++23` or later.

Macros starting with ``PRI`` and ``__PRI`` from `<inttypes.h>` are
expanded, escaping is handled and adjacent strings are concatenated to form
a single ``StringLiteral`` before the format string is converted. Use of
any other macros in the format string will cause a warning message to be
emitted and no conversion will be performed. The converted format string
will always be a single string literal.

The check doesn't do a bad job, but it's not perfect. In particular:

Expand All @@ -34,13 +41,10 @@ The check doesn't do a bad job, but it's not perfect. In particular:
possible.

- At the point that the check runs, the AST contains a single
``StringLiteral`` for the format string and any macro expansion, token
pasting, adjacent string literal concatenation and escaping has been
handled. Although it's possible for the check to automatically put the
escapes back, they may not be exactly as they were written (e.g.
``"\x0a"`` will become ``"\n"`` and ``"ab" "cd"`` will become
``"abcd"``.) This is helpful since it means that the ``PRIx`` macros from
``<inttypes.h>`` are removed correctly.
``StringLiteral`` for the format string where escapes have been expanded.
The check tries to reconstruct escape sequences, they may not be the same
as they were written (e.g. ``"\x41\x0a"`` will become ``"A\n"`` and
``"ab" "cd"`` will become ``"abcd"``.)

- It supports field widths, precision, positional arguments, leading zeros,
leading ``+``, alignment and alternative forms.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,40 +21,46 @@ typedef __UINT32_TYPE__ uint32_t;
typedef __UINT16_TYPE__ uint16_t;
typedef __UINT8_TYPE__ uint8_t;

#define PRIdMAX "lld"
#define PRId64 "lld"
#if __WORDSIZE == 64
# define __PRI64_PREFIX "l"
#else
# define __PRI64_PREFIX "ll"
#endif

#define PRIdMAX __PRI64_PREFIX "d"
#define PRId64 __PRI64_PREFIX "d"
#define PRId32 "d"
#define PRId16 "hd"
#define PRId8 "hhd"

#define PRIiMAX "lli"
#define PRIi64 "lli"
#define PRIiMAX __PRI64_PREFIX "i"
#define PRIi64 __PRI64_PREFIX "i"
#define PRIi32 "i"
#define PRIi16 "hi"
#define PRIi8 "hhi"

#define PRIiFAST64 "lli"
#define PRIiFAST64 __PRI64_PREFIX "i"
#define PRIiFAST32 "i"
#define PRIiFAST16 "hi"
#define PRIiFAST8 "hhi"

#define PRIiLEAST64 "lli"
#define PRIiLEAST64 __PRI64_PREFIX "i"
#define PRIiLEAST32 "i"
#define PRIiLEAST16 "hi"
#define PRIiLEAST8 "hhi"

#define PRIuMAX "llu"
#define PRIu64 "llu"
#define PRIuMAX __PRI64_PREFIX "u"
#define PRIu64 __PRI64_PREFIX "u"
#define PRIu32 "u"
#define PRIu16 "hu"
#define PRIu8 "hhu"

#define PRIuFAST64 "llu"
#define PRIuFAST64 __PRI64_PREFIX "u"
#define PRIuFAST32 "u"
#define PRIuFAST16 "hu"
#define PRIuFAST8 "hhu"

#define PRIuLEAST64 "llu"
#define PRIuLEAST64 __PRI64_PREFIX "u"
#define PRIuLEAST32 "u"
#define PRIuLEAST16 "hu"
#define PRIuLEAST8 "hhu"
Expand Down
Loading

0 comments on commit 2dd0902

Please sign in to comment.