Skip to content

Commit

Permalink
Add support for unicode names (any character, including emojis) for o…
Browse files Browse the repository at this point in the history
…bject/group/behavior/extension/function names

* Also fix GetExtensionCodeNamespacePrefix not taking its parameter by const reference (useless copy being made at each call)
* Also fix extension code generation from the editor not generating the proper namespaces for unicode names
  • Loading branch information
4ian committed Aug 13, 2023
1 parent 816c03e commit 91cc4c8
Show file tree
Hide file tree
Showing 19 changed files with 405 additions and 246 deletions.
2 changes: 2 additions & 0 deletions Core/GDCore/Events/Parsers/ExpressionParser2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@
#include "GDCore/Project/Project.h"
#include "GDCore/Tools/Localization.h"
#include "GDCore/Tools/MakeUnique.h"
#include "GrammarTerminals.h"

using namespace std;
using namespace gd::GrammarTerminals;

namespace gd {

Expand Down
96 changes: 5 additions & 91 deletions Core/GDCore/Events/Parsers/ExpressionParser2.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "GDCore/String.h"
#include "GDCore/Tools/Localization.h"
#include "GDCore/Tools/MakeUnique.h"
#include "GrammarTerminals.h"
namespace gd {
class Expression;
class ObjectsContainer;
Expand All @@ -28,6 +29,8 @@ class ExpressionMetadata;

namespace gd {

using namespace gd::GrammarTerminals;

/** \brief Parse an expression, returning a tree of node corresponding
* to the parsed expression.
*
Expand Down Expand Up @@ -211,7 +214,7 @@ class GD_CORE_API ExpressionParser2 {
}
SkipIfChar(IsClosingParenthesis);
return factor;
} else if (IsIdentifierAllowedChar()) {
} else if (CheckIfChar(IsAllowedInIdentifier)) {
return Identifier();
}

Expand Down Expand Up @@ -606,95 +609,6 @@ class GD_CORE_API ExpressionParser2 {
return predicate(character);
}

bool IsIdentifierAllowedChar() {
if (currentPosition >= expression.size()) return false;
gd::String::value_type character = expression[currentPosition];

// Quickly compare if the character is a number or ASCII character.
if ((character >= '0' && character <= '9') ||
(character >= 'A' && character <= 'Z') ||
(character >= 'a' && character <= 'z'))
return true;

// Otherwise do the full check against separators forbidden in identifiers.
if (!IsParameterSeparator(character) && !IsDot(character) &&
!IsQuote(character) && !IsBracket(character) &&
!IsExpressionOperator(character) && !IsTermOperator(character)) {
return true;
}

return false;
}

static bool IsWhitespace(gd::String::value_type character) {
return character == ' ' || character == '\n' || character == '\r';
}

static bool IsParameterSeparator(gd::String::value_type character) {
return character == ',';
}

static bool IsDot(gd::String::value_type character) {
return character == '.';
}

static bool IsQuote(gd::String::value_type character) {
return character == '"';
}

static bool IsBracket(gd::String::value_type character) {
return character == '(' || character == ')' || character == '[' ||
character == ']' || character == '{' || character == '}';
}

static bool IsOpeningParenthesis(gd::String::value_type character) {
return character == '(';
}

static bool IsClosingParenthesis(gd::String::value_type character) {
return character == ')';
}

static bool IsOpeningSquareBracket(gd::String::value_type character) {
return character == '[';
}

static bool IsClosingSquareBracket(gd::String::value_type character) {
return character == ']';
}

static bool IsExpressionEndingChar(gd::String::value_type character) {
return character == ',' || IsClosingParenthesis(character) ||
IsClosingSquareBracket(character);
}

static bool IsExpressionOperator(gd::String::value_type character) {
return character == '+' || character == '-' || character == '<' ||
character == '>' || character == '?' || character == '^' ||
character == '=' || character == '\\' || character == ':' ||
character == '!';
}

static bool IsUnaryOperator(gd::String::value_type character) {
return character == '+' || character == '-';
}

static bool IsTermOperator(gd::String::value_type character) {
return character == '/' || character == '*';
}

static bool IsNumberFirstChar(gd::String::value_type character) {
return character == '.' || (character >= '0' && character <= '9');
}

static bool IsNonZeroDigit(gd::String::value_type character) {
return (character >= '1' && character <= '9');
}

static bool IsZeroDigit(gd::String::value_type character) {
return character == '0';
}

bool IsNamespaceSeparator() {
// Namespace separator is a special kind of delimiter as it is 2 characters
// long
Expand All @@ -715,7 +629,7 @@ class GD_CORE_API ExpressionParser2 {
gd::String name;
size_t startPosition = currentPosition;
while (currentPosition < expression.size() &&
(IsIdentifierAllowedChar()
(CheckIfChar(IsAllowedInIdentifier)
// Allow whitespace in identifier name for compatibility
|| expression[currentPosition] == ' ')) {
name += expression[currentPosition];
Expand Down
107 changes: 107 additions & 0 deletions Core/GDCore/Events/Parsers/GrammarTerminals.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#pragma once
#include "GDCore/String.h"

namespace gd {

/**
* Contains functions to handle the grammar of the expressions accepted by GDevelop.
*/
namespace GrammarTerminals {

inline bool IsWhitespace(gd::String::value_type character) {
return character == ' ' || character == '\n' || character == '\r';
}

inline bool IsParameterSeparator(gd::String::value_type character) {
return character == ',';
}

inline bool IsDot(gd::String::value_type character) { return character == '.'; }

inline bool IsQuote(gd::String::value_type character) {
return character == '"';
}

inline bool IsBracket(gd::String::value_type character) {
return character == '(' || character == ')' || character == '[' ||
character == ']' || character == '{' || character == '}';
}

inline bool IsOpeningParenthesis(gd::String::value_type character) {
return character == '(';
}

inline bool IsClosingParenthesis(gd::String::value_type character) {
return character == ')';
}

inline bool IsOpeningSquareBracket(gd::String::value_type character) {
return character == '[';
}

inline bool IsClosingSquareBracket(gd::String::value_type character) {
return character == ']';
}

inline bool IsExpressionEndingChar(gd::String::value_type character) {
return character == ',' || IsClosingParenthesis(character) ||
IsClosingSquareBracket(character);
}

inline bool IsExpressionOperator(gd::String::value_type character) {
return character == '+' || character == '-' || character == '<' ||
character == '>' || character == '?' || character == '^' ||
character == '=' || character == '\\' || character == ':' ||
character == '!';
}

inline bool IsUnaryOperator(gd::String::value_type character) {
return character == '+' || character == '-';
}

inline bool IsTermOperator(gd::String::value_type character) {
return character == '/' || character == '*';
}

inline bool IsNumberFirstChar(gd::String::value_type character) {
return character == '.' || (character >= '0' && character <= '9');
}

inline bool IsNonZeroDigit(gd::String::value_type character) {
return (character >= '1' && character <= '9');
}

inline bool IsZeroDigit(gd::String::value_type character) {
return character == '0';
}

/**
* Check if the given character can be used in an identifier. This is
* any unicode character, except for:
* `, . " () [] {} + - < > ? ^ = \ : ! / *` and whitespaces (space, line break, carriage return).
*
* This is loosely based on what is allowed in languages like JavaScript
* (see https://mathiasbynens.be/notes/javascript-properties), without support
* for unicode escape syntax, and allowing all unicode ranges. The only
* disallowed characters are the one used for the grammar.
*/
inline bool IsAllowedInIdentifier(gd::String::value_type character) {
// Quickly compare if the character is a number or ASCII character.
if ((character >= '0' && character <= '9') ||
(character >= 'A' && character <= 'Z') ||
(character >= 'a' && character <= 'z'))
return true;

// Otherwise do the full check against separators forbidden in identifiers.
if (!IsParameterSeparator(character) && !IsDot(character) &&
!IsQuote(character) && !IsBracket(character) &&
!IsExpressionOperator(character) && !IsTermOperator(character) &&
!IsWhitespace(character)) {
return true;
}

return false;
}

} // namespace GrammarTerminals
} // namespace gd
50 changes: 41 additions & 9 deletions Core/GDCore/Project/Project.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <vector>

#include "GDCore/CommonTools.h"
#include "GDCore/Events/Parsers/GrammarTerminals.h"
#include "GDCore/Extensions/Metadata/ExpressionMetadata.h"
#include "GDCore/Extensions/Metadata/MetadataProvider.h"
#include "GDCore/Extensions/Platform.h"
Expand Down Expand Up @@ -49,6 +50,11 @@ using namespace std;

namespace gd {

// By default, disallow unicode in identifiers, but this can be set to true
// by the IDE. In the future, this will be set to true by default, keeping backward compatibility.
// We keep it disabled by default to progressively ask users to test it in real projects.
bool Project::allowUsageOfUnicodeIdentifierNames = false;

Project::Project()
: name(_("Project")),
version("1.0.0"),
Expand Down Expand Up @@ -630,8 +636,10 @@ void Project::UnserializeFrom(const SerializerElement& element) {
SetAdaptGameResolutionAtRuntime(
propElement.GetBoolAttribute("adaptGameResolutionAtRuntime", false));
SetSizeOnStartupMode(propElement.GetStringAttribute("sizeOnStartupMode", ""));
SetAntialiasingMode(propElement.GetStringAttribute("antialiasingMode", "MSAA"));
SetAntialisingEnabledOnMobile(propElement.GetBoolAttribute("antialisingEnabledOnMobile", false));
SetAntialiasingMode(
propElement.GetStringAttribute("antialiasingMode", "MSAA"));
SetAntialisingEnabledOnMobile(
propElement.GetBoolAttribute("antialisingEnabledOnMobile", false));
SetProjectUuid(propElement.GetStringAttribute("projectUuid", ""));
SetAuthor(propElement.GetChild("author", 0, "Auteur").GetValue().GetString());
SetPackageName(propElement.GetStringAttribute("packageName"));
Expand Down Expand Up @@ -887,7 +895,8 @@ void Project::SerializeTo(SerializerElement& element) const {
adaptGameResolutionAtRuntime);
propElement.SetAttribute("sizeOnStartupMode", sizeOnStartupMode);
propElement.SetAttribute("antialiasingMode", antialiasingMode);
propElement.SetAttribute("antialisingEnabledOnMobile", isAntialisingEnabledOnMobile);
propElement.SetAttribute("antialisingEnabledOnMobile",
isAntialisingEnabledOnMobile);
propElement.SetAttribute("projectUuid", projectUuid);
propElement.SetAttribute("folderProject", folderProject);
propElement.SetAttribute("packageName", packageName);
Expand Down Expand Up @@ -993,14 +1002,28 @@ void Project::SerializeTo(SerializerElement& element) const {
externalSourceFilesElement.AddChild("sourceFile"));
}

void Project::AllowUsageOfUnicodeIdentifierNames(bool enable) {
allowUsageOfUnicodeIdentifierNames = enable;
}

bool Project::IsNameSafe(const gd::String& name) {
if (name.empty()) return false;

if (isdigit(name[0])) return false;

gd::String allowedCharacters =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
return !(name.find_first_not_of(allowedCharacters) != gd::String::npos);
if (!allowUsageOfUnicodeIdentifierNames) {
gd::String legacyAllowedCharacters =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
return !(name.find_first_not_of(legacyAllowedCharacters) != gd::String::npos);
} else {
for (auto character : name) {
if (!GrammarTerminals::IsAllowedInIdentifier(character)) {
return false;
}
}

return true;
}
}

gd::String Project::GetSafeName(const gd::String& name) {
Expand All @@ -1010,12 +1033,21 @@ gd::String Project::GetSafeName(const gd::String& name) {

if (isdigit(name[0])) newName = "_" + newName;

gd::String allowedCharacters =
gd::String legacyAllowedCharacters =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";

for (size_t i = 0;i < newName.size();++i) {
// Note that iterating on the characters is not super efficient (O(n^2), which
// could be avoided with an iterator), but this function is not critical for performance
// (only used to generate a name when a user creates a new entity or rename one).
auto character = newName[i];
bool isAllowed =
allowUsageOfUnicodeIdentifierNames
? GrammarTerminals::IsAllowedInIdentifier(character)
: legacyAllowedCharacters.find(character) != gd::String::npos;

// Replace all unallowed letters by an underscore.
if (allowedCharacters.find_first_of(std::u32string(1, newName[i])) == gd::String::npos) {
if (!isAllowed) {
newName.replace(i, 1, '_');
}
}
Expand Down
20 changes: 18 additions & 2 deletions Core/GDCore/Project/Project.h
Original file line number Diff line number Diff line change
Expand Up @@ -975,10 +975,24 @@ class GD_CORE_API Project : public ObjectsContainer {

///@}

/** \name Other
/** \name Identifier names
*/
///@{

/**
* Check if unicode names are allowed in identifier names.
* \see IsNameSafe
* \see GetSafeName
*/
static bool IsUsageOfUnicodeIdentifierNamesAllowed() { return allowUsageOfUnicodeIdentifierNames; };

/**
* Set if unicode names are allowed in identifier names.
* \see IsNameSafe
* \see GetSafeName
*/
static void AllowUsageOfUnicodeIdentifierNames(bool enable);

/**
* Return true if \a name is valid (can be used safely for an object,
* behavior, events function name, etc...).
Expand All @@ -989,7 +1003,7 @@ class GD_CORE_API Project : public ObjectsContainer {
* Return a name, based on the one passed in parameter, that can be safely used
* for an object, behavior, events function name, etc...
*/
static gd::String GetSafeName(const gd::String& name);
static gd::String GetSafeName(const gd::String& name);
///@}

/** \name External source files
Expand Down Expand Up @@ -1124,6 +1138,8 @@ class GD_CORE_API Project : public ObjectsContainer {
///< time the project was saved.
mutable unsigned int gdBuildVersion; ///< The GD build version used the last
///< time the project was saved.

static bool allowUsageOfUnicodeIdentifierNames;
};

} // namespace gd
Expand Down
Loading

0 comments on commit 91cc4c8

Please sign in to comment.