exercism · meatball133 · Dec 5, 2022 · Dec 5, 2022 · Dec 5, 2022 · Dec 5, 2022
diff --git a/config.json b/config.json
@@ -450,6 +450,21 @@
         "prerequisites": ["basics", "bools", "conditionals", "strings"],
         "difficulty": 1
       },
+      {
+        "slug": "micro-blog",
+        "name": "Micro Blog",
+        "uuid": "09494d98-5a80-482f-bd97-924e4817eab5",
+        "practices": ["sequences"],
+        "prerequisites": [
+          "basics",
+          "conditionals",
+          "lists",
+          "loops",
+          "numbers",
+          "strings"
+        ],
+        "difficulty": 1
+      },
       {
         "slug": "space-age",
         "name": "Space Age",

diff --git a/exercises/practice/micro-blog/.docs/instructions.md b/exercises/practice/micro-blog/.docs/instructions.md
@@ -0,0 +1,37 @@
+# Instructions
+
+You have identified a gap in the social media market for very very short posts.
+Now that Twitter allows 280 character posts, people wanting quick social media updates aren't being served.
+You decide to create your own social media network.
+
+To make your product noteworthy, you make it extreme and only allow posts of 5 or less characters.
+Any posts of more than 5 characters should be truncated to 5.
+
+To allow your users to express themselves fully, you allow Emoji and other Unicode.
+
+The task is to truncate input strings to 5 characters.
+
+## Text Encodings
+
+Text stored digitally has to be converted to a series of bytes.
+There are 3 ways to map characters to bytes in common use.
+
+- **ASCII** can encode English language characters.
+  All characters are precisely 1 byte long.
+- **UTF-8** is a Unicode text encoding.
- **UTF-8** is a Unicode text encoding.
+- **UTF-8** is a variable-length Unicode text encoding.
- **UTF-8** is a Unicode text encoding.
+- **UTF-8** is a variable-length Unicode text encoding.
+  Characters take between 1 and 4 bytes.
+- **UTF-16** is a Unicode text encoding.
- **UTF-16** is a Unicode text encoding.
+- **UTF-16** is also a variable-length Unicode text encoding.
- **UTF-16** is a Unicode text encoding.
+- **UTF-16** is also a variable-length Unicode text encoding.
+  Characters are either 2 or 4 bytes long.
+
+UTF-8 and UTF-16 are both Unicode encodings which means they're capable of representing a massive range of characters including:
-UTF-8 and UTF-16 are both Unicode encodings which means they're capable of representing a massive range of characters including:
+UTF-8 and UTF-16 are both capable of representing a massive range of reader-perceived 'characters' or [graphemes][grapheme] including:
-UTF-8 and UTF-16 are both Unicode encodings which means they're capable of representing a massive range of characters including:
+UTF-8 and UTF-16 are both capable of representing a massive range of reader-perceived 'characters' or [graphemes][grapheme] including:
+
+- Text in most of the world's languages and scripts
+- Historic text
+- Emoji
- Emoji
+- Emoji
+- Symbols used in Physics and Mathematics
- Emoji
+- Emoji
+- Symbols used in Physics and Mathematics
+
+UTF-8 and UTF-16 are both variable length encodings, which means that different characters take up different amounts of space.
-UTF-8 and UTF-16 are both variable length encodings, which means that different characters take up different amounts of space.
+UTF-8 and UTF-16 are both variable length encodings, which means that different graphemes can take up different amounts of space.
-UTF-8 and UTF-16 are both variable length encodings, which means that different characters take up different amounts of space.
+UTF-8 and UTF-16 are both variable length encodings, which means that different graphemes can take up different amounts of space.
+
+Consider the letter 'a' and the emoji '😛'.
+In UTF-16 the letter takes 2 bytes but the emoji takes 4 bytes.
+
+The trick to this exercise is to use APIs designed around Unicode characters (codepoints) instead of Unicode codeunits.
-The trick to this exercise is to use APIs designed around Unicode characters (codepoints) instead of Unicode codeunits.
+The trick to this exercise is to use APIs designed around Unicode characters (codepoints) instead of Unicode codeunits.
+
+[grapheme]: https://dictionary.cambridge.org/us/dictionary/english/grapheme
-The trick to this exercise is to use APIs designed around Unicode characters (codepoints) instead of Unicode codeunits.
+The trick to this exercise is to use APIs designed around Unicode characters (codepoints) instead of Unicode codeunits.
+
+[grapheme]: https://dictionary.cambridge.org/us/dictionary/english/grapheme
diff --git a/exercises/practice/micro-blog/.meta/config.json b/exercises/practice/micro-blog/.meta/config.json
@@ -0,0 +1,19 @@
+{
+  "blurb": "Given an input string, truncate it to 5 characters.",
-  "blurb": "Given an input string, truncate it to 5 characters.",
+  "blurb": "Given a Unicode input string, truncate it to 5 grapheme clusters.",
-  "blurb": "Given an input string, truncate it to 5 characters.",
+  "blurb": "Given a Unicode input string, truncate it to 5 grapheme clusters.",
+  "authors": [
+    "meatball133",
+    "Bethanyg"
+  ],
+  "contributors": [],
+  "files": {
+    "solution": [
+      "micro_blog.py"
+    ],
+    "test": [
+      "micro_blog_test.py"
+    ],
+    "example": [
+      ".meta/example.py"
+    ]
+  }
+}
diff --git a/exercises/practice/micro-blog/.meta/example.py b/exercises/practice/micro-blog/.meta/example.py
@@ -0,0 +1,2 @@
+def truncate(letters):
+    return letters[:5]
diff --git a/exercises/practice/micro-blog/.meta/template.j2 b/exercises/practice/micro-blog/.meta/template.j2
@@ -0,0 +1,17 @@
+{%- import "generator_macros.j2" as macros with context -%}
+{% macro test_case(case) -%}
+    {% if "→" in case["description"] %}
+    def test_german_language_long(self):
+    {% else %}
+    def test_{{case["description"] | to_snake}}(self):
+    {% endif %}
+        self.assertEqual({{ case["property"] | to_snake }}("{{case["input"]["phrase"]}}"), 
+        "{{case["expected"]}}"
+        )
+{%- endmacro %}
+{{ macros.header()}}
+
+class {{ exercise | camel_case }}Test(unittest.TestCase):
+    {% for case in cases -%}
+        {{ test_case(case) }}
+    {% endfor %}
diff --git a/exercises/practice/micro-blog/.meta/tests.toml b/exercises/practice/micro-blog/.meta/tests.toml
@@ -0,0 +1,46 @@
+# This is an auto-generated file.
+#
+# Regenerating this file via `configlet sync` will:
+# - Recreate every `description` key/value pair
+# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
+# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
+# - Preserve any other key/value pair
+#
+# As user-added comments (using the # character) will be removed when this file
+# is regenerated, comments can be added via a `comment` key.
+
+[b927b57f-7c98-42fd-8f33-fae091dc1efc]
+description = "English language short"
+
+[a3fcdc5b-0ed4-4f49-80f5-b1a293eac2a0]
+description = "English language long"
+
+[01910864-8e15-4007-9c7c-ac956c686e60]
+description = "German language short (broth)"
+
+[f263e488-aefb-478f-a671-b6ba99722543]
+description = "German language long (bear carpet → beards)"
+
+[0916e8f1-41d7-4402-a110-b08aa000342c]
+description = "Bulgarian language short (good)"
+
+[bed6b89c-03df-4154-98e6-a61a74f61b7d]
+description = "Greek language short (health)"
+
+[485a6a70-2edb-424d-b999-5529dbc8e002]
+description = "Maths short"
+
+[8b4b7b51-8f48-4fbe-964e-6e4e6438be28]
+description = "Maths long"
+
+[71f4a192-0566-4402-a512-fe12878be523]
+description = "English and emoji short"
+
+[6f0f71f3-9806-4759-a844-fa182f7bc203]
+description = "Emoji short"
+
+[ce71fb92-5214-46d0-a7f8-d5ba56b4cc6e]
+description = "Emoji long"
+
+[5dee98d2-d56e-468a-a1f2-121c3f7c5a0b]
+description = "Royal Flush?"
diff --git a/exercises/practice/micro-blog/micro_blog.py b/exercises/practice/micro-blog/micro_blog.py
@@ -0,0 +1,2 @@
+def truncate(string):
+    return string[:5]
diff --git a/exercises/practice/micro-blog/micro_blog_test.py b/exercises/practice/micro-blog/micro_blog_test.py
@@ -0,0 +1,57 @@
+import unittest
+
+from micro_blog import (
+    truncate,
+)
+
+# Tests adapted from `problem-specifications//canonical-data.json`
+
+
+class MicroBlogTest(unittest.TestCase):
+    def test_english_language_short(self):
+
+        self.assertEqual(truncate("Hi"), "Hi")
+
+    def test_english_language_long(self):
+
+        self.assertEqual(truncate("Hello there"), "Hello")
+
+    def test_german_language_short_broth(self):
+
+        self.assertEqual(truncate("brühe"), "brühe")
+
+    def test_german_language_long(self):
+
+        self.assertEqual(truncate("Bärteppich"), "Bärte")
+
+    def test_bulgarian_language_short_good(self):
+
+        self.assertEqual(truncate("Добър"), "Добър")
+
+    def test_greek_language_short_health(self):
+
+        self.assertEqual(truncate("υγειά"), "υγειά")
+
+    def test_maths_short(self):
+
+        self.assertEqual(truncate("a=πr²"), "a=πr²")
+
+    def test_maths_long(self):
+
+        self.assertEqual(truncate("∅⊊ℕ⊊ℤ⊊ℚ⊊ℝ⊊ℂ"), "∅⊊ℕ⊊ℤ")
+
+    def test_english_and_emoji_short(self):
+
+        self.assertEqual(truncate("Fly 🛫"), "Fly 🛫")
+
+    def test_emoji_short(self):
+
+        self.assertEqual(truncate("💇"), "💇")
+
+    def test_emoji_long(self):
+
+        self.assertEqual(truncate("❄🌡🤧🤒🏥🕰😀"), "❄🌡🤧🤒🏥")
+
+    def test_royal_flush(self):
+
+        self.assertEqual(truncate("🃎🂸🃅🃋🃍🃁🃊"), "🃎🂸🃅🃋🃍")