Merge pull request #46 from OpenRarity/strip_token_attributes

Strip whitespace for attribute names and string attribute values
OpenRarity · Sep 14, 2022 · 7156bf1 · 7156bf1
2 parents eeb51ba + e5a3fd0
commit 7156bf1
Show file tree

Hide file tree

Showing 7 changed files with 210 additions and 9 deletions.
diff --git a/open_rarity/models/collection.py b/open_rarity/models/collection.py
@@ -9,6 +9,7 @@
     StringAttribute,
 )
 from open_rarity.models.token_standard import TokenStandard
+from open_rarity.models.utils.attribute_utils import normalize_attribute_string
 
 
 @dataclass
@@ -117,8 +118,9 @@ def _normalize_attributes_frequency_counts(
             AttributeName, dict[AttributeValue, int]
         ],
     ) -> dict[AttributeName, dict[AttributeValue, int]]:
-        """We normalize all collection attributes by ensuring that upper/lower
-        casing doesn't produce different attributes (e.g. 'Hat' == 'hat').
+        """We normalize all collection attributes to ensure that neither casing nor
+        leading/trailing spaces produce different attributes:
+            (e.g. 'Hat' == 'hat' == 'hat ')
         If a collection has the following in their attributes frequency counts:
             ('Hat', 'beanie') 5 tokens and
             ('hat', 'beanie') 10 tokens
@@ -129,12 +131,12 @@ def _normalize_attributes_frequency_counts(
             attr_name,
             attr_value_to_count,
         ) in attributes_frequency_counts.items():
-            normalized_name = attr_name.lower()
+            normalized_name = normalize_attribute_string(attr_name)
             if normalized_name not in normalized:
                 normalized[normalized_name] = {}
             for attr_value, attr_count in attr_value_to_count.items():
                 normalized_value = (
-                    attr_value.lower()
+                    normalize_attribute_string(attr_value)
                     if isinstance(attr_value, str)
                     else attr_value
                 )

diff --git a/open_rarity/models/token.py b/open_rarity/models/token.py
@@ -3,6 +3,7 @@
 from open_rarity.models.token_identifier import TokenIdentifier
 from open_rarity.models.token_metadata import TokenMetadata
 from open_rarity.models.token_standard import TokenStandard
+from open_rarity.models.utils.attribute_utils import normalize_attribute_string
 
 
 @dataclass
@@ -25,5 +26,47 @@ class Token:
     token_standard: TokenStandard
     metadata: TokenMetadata
 
+    def __post_init__(self):
+        self.metadata = self._normalize_metadata(self.metadata)
+
+    def _normalize_metadata(self, metadata: TokenMetadata) -> TokenMetadata:
+        """Normalizes token metadata to ensure the attribute names are lower cased
+        and whitespace stripped to ensure equality consistency.
+
+        Parameters
+        ----------
+        metadata : TokenMetadata
+            The original token metadata
+
+        Returns
+        -------
+        TokenMetadata
+            A new normalized token metadata
+        """
+
+        def normalize_and_reset(attributes_dict: dict):
+            """Helper function that takes in an attributes dictionary
+            and normalizes both attribute name in the dictionary as the key
+            and the repeated field inside the <Type>Attribute class
+            """
+            normalized_attributes_dict = {}
+
+            for attribute_name, attr in attributes_dict.items():
+                normalized_attr_name = normalize_attribute_string(
+                    attribute_name
+                )
+                normalized_attributes_dict[normalized_attr_name] = attr
+                if attr.name != normalized_attr_name:
+                    attr.name = normalized_attr_name
+            return normalized_attributes_dict
+
+        return TokenMetadata(
+            string_attributes=normalize_and_reset(metadata.string_attributes),
+            numeric_attributes=normalize_and_reset(
+                metadata.numeric_attributes
+            ),
+            date_attributes=normalize_and_reset(metadata.date_attributes),
+        )
+
     def __str__(self):
         return f"Token[{self.token_identifier}]"
diff --git a/open_rarity/models/token_metadata.py b/open_rarity/models/token_metadata.py
@@ -1,5 +1,7 @@
 from dataclasses import dataclass, field
 
+from open_rarity.models.utils.attribute_utils import normalize_attribute_string
+
 AttributeName = str
 AttributeValue = str
 
@@ -20,9 +22,10 @@ class StringAttribute:
     value: AttributeValue
 
     def __init__(self, name: AttributeName, value: AttributeValue):
-        # We treat string attributes name and value the same regardless of casing.
-        self.name = str(name).lower()
-        self.value = str(value).lower()
+        # We treat string attributes name and value the same regardless of
+        # casing or leading/trailing whitespaces.
+        self.name = normalize_attribute_string(name)
+        self.value = normalize_attribute_string(value)
 
 
 @dataclass

diff --git a/open_rarity/models/utils/attribute_utils.py b/open_rarity/models/utils/attribute_utils.py
@@ -0,0 +1,17 @@
+def normalize_attribute_string(value: str) -> str:
+    """Normalizes either attribute names or string attribute values.
+    This is a helper function to ensure we are consistently normalizing
+    by always lower casing and stripping input string.
+
+    Parameters
+    ----------
+    value : str
+        The string to normalize
+        (this should be either attribute name or a string attribute value)
+
+    Returns
+    -------
+    str
+        normalized string
+    """
+    return value.lower().strip()
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 name = "open-rarity"
-version = "0.4.0-beta"
-description = "Open-Rarity library is an open standard that provides an easy , explanable and reproducible computation for NFT rarity"
+version = "0.4.1-beta"
+description = "Open-Rarity library is an open standard that provides an easy, explanable and reproducible computation for NFT rarity"
 authors = ["Dan Meshkov <[email protected]>", "Vicky Gong <[email protected]>"]
 license = "Apache-2.0"
 

diff --git a/tests/models/test_collection.py b/tests/models/test_collection.py
@@ -92,6 +92,27 @@ def test_attribute_frequency_counts_initialization(self):
                 },
                 all_lower_case_attributes,
             ],
+            # Trailing or leading whitespaces
+            [
+                {
+                    " hat": {"beanie": 40, "cap": 25},
+                    "Hat ": {"Cap": 35},
+                    "bottom": {"special": 1},
+                },
+                all_lower_case_attributes,
+            ],
+            # Middle whitespace
+            [
+                {
+                    "hat": {
+                        "big beanie": 40,
+                        "cap": 25,
+                        "big beanie ": 10,
+                        "beanie": 5,
+                    },
+                },
+                {"hat": {"big beanie": 50, "cap": 25, "beanie": 5}},
+            ],
             # Empty
             [{}, {}],
         ]

diff --git a/tests/models/test_token.py b/tests/models/test_token.py
@@ -0,0 +1,115 @@
+from open_rarity.models.token_metadata import (
+    NumericAttribute,
+    StringAttribute,
+    TokenMetadata,
+)
+
+from tests.helpers import create_evm_token
+
+
+class TestToken:
+    def test_token_init_metadata_non_matching_attribute_names(self):
+        token = create_evm_token(
+            token_id=1,
+            metadata=TokenMetadata(
+                string_attributes={
+                    "hat": StringAttribute(name="big hat", value="blue"),
+                    "shirt": StringAttribute(name=" shirt", value="red"),
+                }
+            ),
+        )
+        assert token.metadata.string_attributes == {
+            "hat": StringAttribute(name="hat", value="blue"),
+            "shirt": StringAttribute(name="shirt", value="red"),
+        }
+
+    def test_token_attribute_normalization(self):
+        expected_equal_metadata_tokens = [
+            create_evm_token(
+                token_id=1,
+                metadata=TokenMetadata(
+                    string_attributes={
+                        "hat ": StringAttribute(name="hat", value="blue"),
+                        "Shirt ": StringAttribute(name="shirt", value="red"),
+                    },
+                    numeric_attributes={
+                        "level": NumericAttribute(name="level", value=1),
+                    },
+                ),
+            ),
+            create_evm_token(
+                token_id=1,
+                metadata=TokenMetadata(
+                    string_attributes={
+                        "hat": StringAttribute(name="hat", value="blue"),
+                        "Shirt ": StringAttribute(name=" shirt", value="red"),
+                    },
+                    numeric_attributes={
+                        "Level": NumericAttribute(name="level", value=1),
+                    },
+                ),
+            ),
+            create_evm_token(
+                token_id=1,
+                metadata=TokenMetadata(
+                    string_attributes={
+                        "Hat": StringAttribute(name=" hat ", value="blue"),
+                        "shirt": StringAttribute(name="shirt", value="red"),
+                    },
+                    numeric_attributes={
+                        "Level": NumericAttribute(name=" level ", value=1),
+                    },
+                ),
+            ),
+            create_evm_token(
+                token_id=1,
+                metadata=TokenMetadata(
+                    string_attributes={
+                        "  hat ": StringAttribute(name=" hat ", value="blue"),
+                        "   shirt": StringAttribute(name="shirt", value="red"),
+                    },
+                    numeric_attributes={
+                        "level": NumericAttribute(name="level ", value=1),
+                    },
+                ),
+            ),
+        ]
+
+        assert all(
+            t.metadata == expected_equal_metadata_tokens[0].metadata
+            for t in expected_equal_metadata_tokens
+        )
+
+        expected_not_equal = [
+            create_evm_token(
+                token_id=1,
+                metadata=TokenMetadata(
+                    string_attributes={
+                        " big hat ": StringAttribute(
+                            name=" hat ", value="blue"
+                        ),
+                        "   shirt": StringAttribute(name="shirt", value="red"),
+                    },
+                    numeric_attributes={
+                        "level": NumericAttribute(name="level", value=1),
+                    },
+                ),
+            ),
+            create_evm_token(
+                token_id=1,
+                metadata=TokenMetadata(
+                    string_attributes={
+                        "hat": StringAttribute(name="hat", value="blue"),
+                        "shirt": StringAttribute(name="shirt", value="red"),
+                    },
+                    numeric_attributes={
+                        "big level": NumericAttribute(name="level", value=1),
+                    },
+                ),
+            ),
+        ]
+
+        assert all(
+            t.metadata != expected_equal_metadata_tokens[0].metadata
+            for t in expected_not_equal
+        )