From 002353fb06a9d38afd1f2c11835d8f4745e028ac Mon Sep 17 00:00:00 2001
From: ISSOtm <eldredhabert0@gmail.com>
Date: Fri, 23 Jul 2021 01:30:59 +0200
Subject: [PATCH] [WIP] Switch to new "bit explanation" format

---
 CONTRIBUTING.md        |  41 +++++-
 Cargo.lock             |   3 +
 preproc/Cargo.toml     |   5 +-
 preproc/src/main.rs    |   2 +-
 preproc/src/preproc.rs | 301 +++++++++++++++++++++++++++++++++++------
 src/OAM.md             |  18 +--
 6 files changed, 317 insertions(+), 53 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b1eaeefa..12a438cd 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -35,7 +35,6 @@ In any case, maintainers will chime in, reviewing what you changed and if necess
 
 ## Document Style
 
-
 ### 1. Pseudocode
 
 - Assignment: :=
@@ -71,7 +70,6 @@ References:
 Discussion:
 - [#76](https://github.com/gbdev/pandocs/issues/76), [#55](https://github.com/gbdev/pandocs/issues/55)
 
-
 ### 3. 8 bits / 8-bit
 
 - "8 bits" and "8-bit" have different usages in the English language. The former is used when talking about the quantity ("a byte has 8 bits"), while the latter is used as an adjective ("8-bit bytes are nowadays standard"). "8bit" is obviously wrong, and "8 bit" is likewise incorrect.
@@ -139,6 +137,45 @@ For example, `[sample link text](<#014B — Old licensee code>)` will automatica
 References to missing sections will be left as-is, and ambiguous references resolved arbitrarily (this should eventually change).
 (Note that the use of angle brackets `<>` here is [a CommonMark feature](https://spec.commonmark.org/0.30/#link-destination) to allow spaces in the link destination.)
 
+### 9. Bit breakdown tables
+
+Quite often, a single byte's various bits encode different information.
+(For example, the "attributes" byte in OAM, all APU registers, and so on.)
+To describe those cases, we use a mix of custom syntax and a list:
+
+```markdown
+{{#bits 8
+  "Attributes"  7:"Priority" 6:"Y flip" 5:"X flip" 4:"DMG palette" 3:"Bank" 2-0:"CGB palette";
+}}
+
+- **Priority**: `0` = No, `1` = BG and Window colors 1-3 over this OBJ
+- **Y flip**: `0` = Normal, `1` = Entire OBJ is vertically mirrored
+- **X flip**: `0` = Normal, `1` = Entire OBJ is horizontally mirrored
+- **DMG palette** *\[Non CGB Mode only\]*: `0` = OBP0, `1` = OBP1
+- **Bank** *\[CGB Mode Only\]*: `0` = Fetch tile in VRAM bank 0, `1` = Fetch tile in VRAM bank 1
+- **CGB palette** *\[CGB Mode Only\]*: Use OBP0-7
+```
+
+- The `{{#bits}}` tag can span several lines for readability, and must contain first its "width", i.e. how many bits (columns) there should be; then a list of rows, separated by semicolons `;` (a trailing one is allowed).
+
+  Each row begins by its name, which must be surrounded by double quotes (to allow whitespace in it).
+Then, there's a list of *fields*, separated by whitespace: first its bit range (where e.g. `3` is equivalent to `3-3`), then its name, also surrounded by double quotes.
+
+  Field names should be succinct, otherwise the table may overflow, particularly on phones.
+
+  (Note: the tag can be escaped by putting a backslash in front of the first brace: `\{{#bits ...}}`; this makes the tag not be processed.)
+
+- The list must document all of the fields with a name.
+  Each entry must first contain the name, then any "usage notes" (typically availability, or "ignored if ..." notes) between brackets `[]`, then the read/writability between parentheses.
+  Then a colon, and a description of the field.
+
+  Regarding the formatting:
+  - The name must be in **bold**, since it's really important information.
+  - Anything before the initial colon, except for the name, must be in *italics*.
+  - Any values for the field should be put in `monospace/code blocks`; this ensures they stand out.
+  - The usage notes can be omitted if there are none.
+  - For the sake of readability, if the read/writability of all fields is the same, then it must omitted in the list, but indicated e.g. in the section name, or in main text.
+
 ## SVG 
 
 ### Rationale
diff --git a/Cargo.lock b/Cargo.lock
index 6d231577..1c4f29c6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -690,10 +690,13 @@ dependencies = [
 name = "pandocs-preproc"
 version = "0.1.0"
 dependencies = [
+ "anyhow",
  "clap 2.34.0",
+ "lazy_static",
  "mdbook",
  "pulldown-cmark 0.8.0",
  "pulldown-cmark-to-cmark",
+ "regex",
  "serde_json",
  "termcolor",
 ]
diff --git a/preproc/Cargo.toml b/preproc/Cargo.toml
index 824599a1..d543c915 100644
--- a/preproc/Cargo.toml
+++ b/preproc/Cargo.toml
@@ -7,10 +7,13 @@ edition = "2018"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
+anyhow = "1.0.42"
 clap = "2.33.3"
+lazy_static = "1.4.0"
 # mdbook here is only used as a lib, so no need for the extra features
 mdbook = { version = "0.4.8", default-features = false }
 pulldown-cmark = "0.8.0"
 pulldown-cmark-to-cmark = "6.0.1"
-termcolor = "1.1.2"
+regex = "1.5.4"
 serde_json = "1.0.59"
+termcolor = "1.1.2"
diff --git a/preproc/src/main.rs b/preproc/src/main.rs
index f34c79ca..ca9c11d9 100644
--- a/preproc/src/main.rs
+++ b/preproc/src/main.rs
@@ -62,7 +62,7 @@ fn handle_preprocessing(pre: &dyn Preprocessor) -> Result<(), Error> {
 
 fn handle_supports(pre: &dyn Preprocessor, sub_args: &ArgMatches) -> ! {
     let renderer = sub_args.value_of("renderer").expect("Required argument");
-    let supported = pre.supports_renderer(&renderer);
+    let supported = pre.supports_renderer(renderer);
 
     // Signal whether the renderer is supported by exiting with 1 or 0.
     if supported {
diff --git a/preproc/src/preproc.rs b/preproc/src/preproc.rs
index f278a402..7117af6b 100644
--- a/preproc/src/preproc.rs
+++ b/preproc/src/preproc.rs
@@ -7,10 +7,13 @@
  * http://mozilla.org/MPL/2.0/.
  */
 
+use anyhow::{bail, Context};
+use lazy_static::lazy_static;
 use mdbook::book::{Book, BookItem, Chapter};
 use mdbook::errors::Error;
 use mdbook::preprocess::{Preprocessor, PreprocessorContext};
 use pulldown_cmark::{CowStr, Event, LinkType, Options, Parser, Tag};
+use regex::Regex;
 use std::collections::HashMap;
 use std::io::Write;
 use std::process::{Command, Stdio};
@@ -35,24 +38,75 @@ impl Preprocessor for Pandocs {
     }
 
     fn run(&self, _: &PreprocessorContext, mut book: Book) -> Result<Book, Error> {
+        let mut sections = HashMap::new();
+        for item in book.iter() {
+            if let BookItem::Chapter(ref chapter) = item {
+                self.list_chapter_sections(&mut sections, chapter);
+            }
+        }
+
+        let mut res = Ok(());
+
+        book.for_each_mut(|item| {
+            macro_rules! abort_if_err {
+                ($expr:expr) => {
+                    match $expr {
+                        Err(e) => {
+                            res = Err(e);
+                            return;
+                        }
+                        Ok(ret) => ret,
+                    }
+                };
+            }
+
+            if res.is_err() {
+                return;
+            }
+
+            if let BookItem::Chapter(ref mut chapter) = item {
+                abort_if_err!(self.process_internal_anchor_links(chapter, &sections));
+                abort_if_err!(self.process_bit_descrs(chapter).context(format!("While processing chapter \"{}\"", chapter.name)));
+
+                if chapter.name == "Foreword" {
+                    let commit = abort_if_err!(Commit::rev_parse("HEAD"));
+                    chapter.content.push_str(&format!(
+                        "<small>This document version was produced from git commit [`{}`](https://github.com/gbdev/pandocs/tree/{}) ({}). </small>",
+                        commit.short_hash(), commit.hash(), commit.timestamp(),
+                    ));
+                }
+            }
+        });
+
+        res.map(|_| book)
+    }
+}
+
+#[derive(Debug)]
+struct Commit {
+    hash: String,
+    short_hash: String,
+    timestamp: String,
+}
+
+impl Commit {
+    fn rev_parse(what: &str) -> Result<Self, Error> {
         let output = Command::new("git")
-            .args(&["rev-parse", "HEAD"])
+            .args(["rev-parse", what])
             .stderr(Stdio::inherit())
             .stdin(Stdio::null())
             .output()
             .expect("Failed to get commit hash");
         if !output.status.success() {
             return Err(Error::msg(format!(
-                "Git exited with status {} while getting commit hash",
+                "Git exited with {} while getting commit hash",
                 output.status
             )));
         }
-        let commit_hash = str::from_utf8(&output.stdout)
-            .expect("Commit hash is not valid UTF-8??")
-            .trim();
+        let hash = String::from_utf8(output.stdout).expect("Commit hash is not valid UTF-8??");
 
         let output = Command::new("git")
-            .args(&["rev-parse", "--short", "HEAD"])
+            .args(["rev-parse", "--short", what])
             .stderr(Stdio::inherit())
             .stdin(Stdio::null())
             .output()
@@ -63,12 +117,11 @@ impl Preprocessor for Pandocs {
                 output.status
             )));
         }
-        let commit_short_hash = str::from_utf8(&output.stdout)
-            .expect("Commit hash is not valid UTF-8??")
-            .trim();
+        let short_hash =
+            String::from_utf8(output.stdout).expect("Commit hash is not valid UTF-8??");
 
         let output = Command::new("git")
-            .args(&["show", "-s", "--format=%ci", "HEAD"])
+            .args(["show", "-s", "--format=%ci", what])
             .stderr(Stdio::inherit())
             .stdin(Stdio::null())
             .output()
@@ -79,36 +132,25 @@ impl Preprocessor for Pandocs {
                 output.status
             )));
         }
-        let commit_timestamp = str::from_utf8(&output.stdout)
-            .expect("Commit hash is not valid UTF-8??")
-            .trim();
+        let timestamp = String::from_utf8(output.stdout).expect("Commit hash is not valid UTF-8??");
 
-        let mut sections = HashMap::new();
-        for item in book.iter() {
-            if let BookItem::Chapter(ref chapter) = item {
-                self.list_chapter_sections(&mut sections, &chapter);
-            }
-        }
-
-        let mut res = Ok(());
-
-        book.for_each_mut(|item| {
-            if res.is_err() {
-                return;
-            }
+        Ok(Self {
+            hash,
+            short_hash,
+            timestamp,
+        })
+    }
 
-            if let BookItem::Chapter(ref mut chapter) = item {
-                if let Err(e) = self.process_chapter(chapter, &sections) {
-                    res = Err(e);
-                }
+    fn hash(&self) -> &str {
+        self.hash.trim()
+    }
 
-                if chapter.name == "Foreword" {
-                    chapter.content.push_str(&format!("<small>This document version was produced from git commit [`{}`](https://github.com/gbdev/pandocs/tree/{}) ({}). </small>", commit_short_hash, commit_hash,commit_timestamp));
-                }
-            }
-        });
+    fn short_hash(&self) -> &str {
+        self.short_hash.trim()
+    }
 
-        res.map(|_| book)
+    fn timestamp(&self) -> &str {
+        self.timestamp.trim()
     }
 }
 
@@ -157,16 +199,14 @@ impl Pandocs {
         }
     }
 
-    fn process_chapter(
+    fn process_internal_anchor_links(
         &self,
         chapter: &mut Chapter,
         sections: &HashMap<String, (String, bool)>,
     ) -> Result<(), Error> {
         let mut buf = String::with_capacity(chapter.content.len());
-        let extensions = Options::ENABLE_TABLES
-            | Options::ENABLE_FOOTNOTES
-            | Options::ENABLE_STRIKETHROUGH
-            | Options::ENABLE_SMART_PUNCTUATION;
+        let extensions =
+            Options::ENABLE_TABLES | Options::ENABLE_FOOTNOTES | Options::ENABLE_STRIKETHROUGH;
 
         let events = Parser::new_ext(&chapter.content, extensions).map(|event| match event {
             Event::Start(Tag::Link(link_type, url, title)) if url.starts_with('#') => {
@@ -275,3 +315,182 @@ fn id_from_name(name: &str) -> String {
         })
         .collect::<String>()
 }
+
+impl Pandocs {
+    fn process_bit_descrs(&self, chapter: &mut Chapter) -> Result<(), Error> {
+        // When replacing one thing in a string by something with a different length,
+        // the indices after that will not correspond,
+        // we therefore have to store the difference to correct this
+        let mut previous_end_index = 0;
+        let mut replaced = String::with_capacity(chapter.content.len());
+
+        for result in find_bit_descrs(&chapter.content) {
+            let (start, end, attrs) = result?;
+
+            replaced.push_str(&chapter.content[previous_end_index..start]);
+            replaced.push_str("<table><thead><tr><th></th>");
+            for i in (0..attrs.width).rev() {
+                replaced.push_str(&format!("<th>{}</th>", i));
+            }
+            replaced.push_str("</tr></thead><tbody>");
+
+            for (name, row) in &attrs.rows {
+                replaced.push_str(&format!("<tr><td><strong>{}</strong></td>", name));
+                let mut pos = attrs.width;
+                let mut fields = row.iter().peekable();
+                while pos != 0 {
+                    let (start, unused, name) = match fields.peek() {
+                        // If we are at the edge of a "used" field, use it
+                        Some(field) if field.end == pos - 1 => (field.start, false, field.name),
+                        // If in an unused field, end at the next field, or the width if none such
+                        res => (res.map_or(0, |field| field.end + 1), true, ""),
+                    };
+                    replaced.push_str(&format!(
+                        "<td colspan=\"{}\"{}>{}</td>",
+                        pos - start,
+                        if unused {
+                            " class=\"unused-field\""
+                        } else {
+                            ""
+                        },
+                        name
+                    ));
+
+                    if !unused {
+                        fields.next();
+                    }
+                    pos = start;
+                }
+                replaced.push_str("</tr>");
+            }
+            replaced.push_str("</tbody></table>");
+
+            previous_end_index = end;
+        }
+
+        replaced.push_str(&chapter.content[previous_end_index..]);
+
+        chapter.content = replaced;
+        Ok(())
+    }
+}
+
+fn find_bit_descrs(
+    contents: &str,
+) -> impl Iterator<Item = Result<(usize, usize, BitDescrAttrs<'_>), Error>> {
+    lazy_static! {
+        static ref RE: Regex = Regex::new(
+            r"(?x)             # Allow comments in the regex
+            \\\{\{\#.*\}\}     # Escaped tag (will be ignored)
+            |                  # ...or...
+            \{\{\s*\#bits\s+   # tag opening braces, whitespace, type, and separating whitespace
+            ([^}]+)            # tag contents
+            \}\}               # closing braces"
+        )
+        .unwrap();
+    }
+    RE.captures_iter(contents)
+        .filter(|caps| caps.len() != 1)
+        .map(|caps| {
+            // Must use `.get()`, as indexing ties the returned value's lifetime to `caps`'s.
+            let contents = caps.get(1).unwrap().as_str();
+            BitDescrAttrs::from_str(contents).map(|attrs| {
+                let all = caps.get(0).unwrap(); // There is always a 0th capture.
+                (all.start(), all.end(), attrs)
+            })
+        })
+}
+
+#[derive(Debug)]
+struct BitDescrAttrs<'input> {
+    width: usize,
+    rows: Vec<(&'input str, Vec<BitDescrField<'input>>)>,
+}
+
+impl<'input> BitDescrAttrs<'input> {
+    fn from_str(contents: &'input str) -> Result<Self, Error> {
+        // First, parse the width.
+        let contents = contents.trim();
+        let width_len = contents
+            .find(|c: char| c.is_ascii_whitespace())
+            .ok_or_else(|| Error::msg("{{#bits}} descriptions must describe at least one thing"))?;
+        let width_str = &contents[..width_len];
+        let width = width_str.parse().context(format!(
+            "Expected bits description to start with width, got \"{}\"",
+            width_str
+        ))?;
+        let s = contents[width_len..].trim_start();
+
+        // Next, parse the rows!
+        let mut rows = Vec::new();
+        for row_str in s.split_terminator(';') {
+            let row_str = row_str.trim();
+
+            fn parse_name(row_str: &str) -> Option<usize> {
+                if !row_str.starts_with('"') {
+                    return None;
+                }
+
+                row_str[1..] // Skip the leading quote.
+                    .find('"')
+            }
+            let Some(name_len) = parse_name(row_str) else {
+                bail!("Expected row to begin by its name (did you forget to put quotes around it?)");
+            };
+            let name = &row_str[1..(name_len + 1)];
+            let mut row_str = row_str[(name_len + 2)..].trim_start(); // The end is already trimmed.
+
+            // Then, the fields!
+            let mut fields: Vec<BitDescrField> = Vec::new();
+            while !row_str.is_empty() {
+                lazy_static! {
+                    // Since mdBook has "smart quotes", be lenient about them.
+                    static ref RE: Regex =
+                        Regex::new(r#"^(\d+)(?:\s*-\s*(\d+))?\s*:\s*"([^"]*)""#).unwrap();
+                }
+
+                let Some(cap) = RE.captures(row_str) else {
+                    bail!("Failed to parse field for \"{}\"", row_str);
+                };
+                let end = cap[1].parse().unwrap();
+                let start = cap
+                    .get(2)
+                    .map_or(end, |end_match| end_match.as_str().parse().unwrap());
+                let name = &cap.get(3).unwrap().as_str();
+
+                // Perform sanity checks.
+                if start > end {
+                    bail!(
+                        "Field must end after it started (expected {} <= {})",
+                        start,
+                        end,
+                    );
+                }
+                if let Some(field) = fields.last() {
+                    if field.end <= start {
+                        bail!(
+                            "Field must start after previous ended (expected {} > {})",
+                            field.end,
+                            start,
+                        );
+                    }
+                }
+
+                fields.push(BitDescrField { start, end, name });
+                // Advance by the match's length, plus any whitespace after it.
+                row_str = row_str[cap[0].len()..].trim_start();
+            }
+
+            rows.push((name, fields));
+        }
+
+        Ok(BitDescrAttrs { width, rows })
+    }
+}
+
+#[derive(Debug)]
+struct BitDescrField<'a> {
+    start: usize,
+    end: usize,
+    name: &'a str,
+}
diff --git a/src/OAM.md b/src/OAM.md
index a0d40c9d..4c39a537 100644
--- a/src/OAM.md
+++ b/src/OAM.md
@@ -49,14 +49,16 @@ tile is "NN & \$FE", and the bottom 8×8 tile is "NN | \$01".
 
 ## Byte 3 — Attributes/Flags
 
-```
- Bit7   BG and Window over OBJ (0=No, 1=BG and Window colors 1-3 over the OBJ)
- Bit6   Y flip          (0=Normal, 1=Vertically mirrored)
- Bit5   X flip          (0=Normal, 1=Horizontally mirrored)
- Bit4   Palette number  **Non CGB Mode Only** (0=OBP0, 1=OBP1)
- Bit3   Tile VRAM-Bank  **CGB Mode Only**     (0=Bank 0, 1=Bank 1)
- Bit2-0 Palette number  **CGB Mode Only**     (OBP0-7)
-```
+{{#bits 8
+  "Attributes"  7:"Priority" 6:"Y flip" 5:"X flip" 4:"DMG palette" 3:"Bank" 2-0:"CGB palette";
+}}
+
+- **Priority**: `0` = No, `1` = BG and Window colors 1-3 over this OBJ
+- **Y flip**: `0` = Normal, `1` = Entire OBJ is vertically mirrored
+- **X flip**: `0` = Normal, `1` = Entire OBJ is horizontally mirrored
+- **DMG palette** *\[Non CGB Mode only\]*: `0` = OBP0, `1` = OBP1
+- **Bank** *\[CGB Mode Only\]*: `0` = Fetch tile in VRAM bank 0, `1` = Fetch tile in VRAM bank 1
+- **CGB palette** *\[CGB Mode Only\]*: Use OBP0-7
 
 ## Writing data to OAM