From 002353fb06a9d38afd1f2c11835d8f4745e028ac Mon Sep 17 00:00:00 2001 From: ISSOtm Date: Fri, 23 Jul 2021 01:30:59 +0200 Subject: [PATCH] [WIP] Switch to new "bit explanation" format --- CONTRIBUTING.md | 41 +++++- Cargo.lock | 3 + preproc/Cargo.toml | 5 +- preproc/src/main.rs | 2 +- preproc/src/preproc.rs | 301 +++++++++++++++++++++++++++++++++++------ src/OAM.md | 18 +-- 6 files changed, 317 insertions(+), 53 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b1eaeefa..12a438cd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -35,7 +35,6 @@ In any case, maintainers will chime in, reviewing what you changed and if necess ## Document Style - ### 1. Pseudocode - Assignment: := @@ -71,7 +70,6 @@ References: Discussion: - [#76](https://github.com/gbdev/pandocs/issues/76), [#55](https://github.com/gbdev/pandocs/issues/55) - ### 3. 8 bits / 8-bit - "8 bits" and "8-bit" have different usages in the English language. The former is used when talking about the quantity ("a byte has 8 bits"), while the latter is used as an adjective ("8-bit bytes are nowadays standard"). "8bit" is obviously wrong, and "8 bit" is likewise incorrect. @@ -139,6 +137,45 @@ For example, `[sample link text](<#014B — Old licensee code>)` will automatica References to missing sections will be left as-is, and ambiguous references resolved arbitrarily (this should eventually change). (Note that the use of angle brackets `<>` here is [a CommonMark feature](https://spec.commonmark.org/0.30/#link-destination) to allow spaces in the link destination.) +### 9. Bit breakdown tables + +Quite often, a single byte's various bits encode different information. +(For example, the "attributes" byte in OAM, all APU registers, and so on.) +To describe those cases, we use a mix of custom syntax and a list: + +```markdown +{{#bits 8 + "Attributes" 7:"Priority" 6:"Y flip" 5:"X flip" 4:"DMG palette" 3:"Bank" 2-0:"CGB palette"; +}} + +- **Priority**: `0` = No, `1` = BG and Window colors 1-3 over this OBJ +- **Y flip**: `0` = Normal, `1` = Entire OBJ is vertically mirrored +- **X flip**: `0` = Normal, `1` = Entire OBJ is horizontally mirrored +- **DMG palette** *\[Non CGB Mode only\]*: `0` = OBP0, `1` = OBP1 +- **Bank** *\[CGB Mode Only\]*: `0` = Fetch tile in VRAM bank 0, `1` = Fetch tile in VRAM bank 1 +- **CGB palette** *\[CGB Mode Only\]*: Use OBP0-7 +``` + +- The `{{#bits}}` tag can span several lines for readability, and must contain first its "width", i.e. how many bits (columns) there should be; then a list of rows, separated by semicolons `;` (a trailing one is allowed). + + Each row begins by its name, which must be surrounded by double quotes (to allow whitespace in it). +Then, there's a list of *fields*, separated by whitespace: first its bit range (where e.g. `3` is equivalent to `3-3`), then its name, also surrounded by double quotes. + + Field names should be succinct, otherwise the table may overflow, particularly on phones. + + (Note: the tag can be escaped by putting a backslash in front of the first brace: `\{{#bits ...}}`; this makes the tag not be processed.) + +- The list must document all of the fields with a name. + Each entry must first contain the name, then any "usage notes" (typically availability, or "ignored if ..." notes) between brackets `[]`, then the read/writability between parentheses. + Then a colon, and a description of the field. + + Regarding the formatting: + - The name must be in **bold**, since it's really important information. + - Anything before the initial colon, except for the name, must be in *italics*. + - Any values for the field should be put in `monospace/code blocks`; this ensures they stand out. + - The usage notes can be omitted if there are none. + - For the sake of readability, if the read/writability of all fields is the same, then it must omitted in the list, but indicated e.g. in the section name, or in main text. + ## SVG ### Rationale diff --git a/Cargo.lock b/Cargo.lock index 6d231577..1c4f29c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -690,10 +690,13 @@ dependencies = [ name = "pandocs-preproc" version = "0.1.0" dependencies = [ + "anyhow", "clap 2.34.0", + "lazy_static", "mdbook", "pulldown-cmark 0.8.0", "pulldown-cmark-to-cmark", + "regex", "serde_json", "termcolor", ] diff --git a/preproc/Cargo.toml b/preproc/Cargo.toml index 824599a1..d543c915 100644 --- a/preproc/Cargo.toml +++ b/preproc/Cargo.toml @@ -7,10 +7,13 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +anyhow = "1.0.42" clap = "2.33.3" +lazy_static = "1.4.0" # mdbook here is only used as a lib, so no need for the extra features mdbook = { version = "0.4.8", default-features = false } pulldown-cmark = "0.8.0" pulldown-cmark-to-cmark = "6.0.1" -termcolor = "1.1.2" +regex = "1.5.4" serde_json = "1.0.59" +termcolor = "1.1.2" diff --git a/preproc/src/main.rs b/preproc/src/main.rs index f34c79ca..ca9c11d9 100644 --- a/preproc/src/main.rs +++ b/preproc/src/main.rs @@ -62,7 +62,7 @@ fn handle_preprocessing(pre: &dyn Preprocessor) -> Result<(), Error> { fn handle_supports(pre: &dyn Preprocessor, sub_args: &ArgMatches) -> ! { let renderer = sub_args.value_of("renderer").expect("Required argument"); - let supported = pre.supports_renderer(&renderer); + let supported = pre.supports_renderer(renderer); // Signal whether the renderer is supported by exiting with 1 or 0. if supported { diff --git a/preproc/src/preproc.rs b/preproc/src/preproc.rs index f278a402..7117af6b 100644 --- a/preproc/src/preproc.rs +++ b/preproc/src/preproc.rs @@ -7,10 +7,13 @@ * http://mozilla.org/MPL/2.0/. */ +use anyhow::{bail, Context}; +use lazy_static::lazy_static; use mdbook::book::{Book, BookItem, Chapter}; use mdbook::errors::Error; use mdbook::preprocess::{Preprocessor, PreprocessorContext}; use pulldown_cmark::{CowStr, Event, LinkType, Options, Parser, Tag}; +use regex::Regex; use std::collections::HashMap; use std::io::Write; use std::process::{Command, Stdio}; @@ -35,24 +38,75 @@ impl Preprocessor for Pandocs { } fn run(&self, _: &PreprocessorContext, mut book: Book) -> Result { + let mut sections = HashMap::new(); + for item in book.iter() { + if let BookItem::Chapter(ref chapter) = item { + self.list_chapter_sections(&mut sections, chapter); + } + } + + let mut res = Ok(()); + + book.for_each_mut(|item| { + macro_rules! abort_if_err { + ($expr:expr) => { + match $expr { + Err(e) => { + res = Err(e); + return; + } + Ok(ret) => ret, + } + }; + } + + if res.is_err() { + return; + } + + if let BookItem::Chapter(ref mut chapter) = item { + abort_if_err!(self.process_internal_anchor_links(chapter, §ions)); + abort_if_err!(self.process_bit_descrs(chapter).context(format!("While processing chapter \"{}\"", chapter.name))); + + if chapter.name == "Foreword" { + let commit = abort_if_err!(Commit::rev_parse("HEAD")); + chapter.content.push_str(&format!( + "This document version was produced from git commit [`{}`](https://github.com/gbdev/pandocs/tree/{}) ({}). ", + commit.short_hash(), commit.hash(), commit.timestamp(), + )); + } + } + }); + + res.map(|_| book) + } +} + +#[derive(Debug)] +struct Commit { + hash: String, + short_hash: String, + timestamp: String, +} + +impl Commit { + fn rev_parse(what: &str) -> Result { let output = Command::new("git") - .args(&["rev-parse", "HEAD"]) + .args(["rev-parse", what]) .stderr(Stdio::inherit()) .stdin(Stdio::null()) .output() .expect("Failed to get commit hash"); if !output.status.success() { return Err(Error::msg(format!( - "Git exited with status {} while getting commit hash", + "Git exited with {} while getting commit hash", output.status ))); } - let commit_hash = str::from_utf8(&output.stdout) - .expect("Commit hash is not valid UTF-8??") - .trim(); + let hash = String::from_utf8(output.stdout).expect("Commit hash is not valid UTF-8??"); let output = Command::new("git") - .args(&["rev-parse", "--short", "HEAD"]) + .args(["rev-parse", "--short", what]) .stderr(Stdio::inherit()) .stdin(Stdio::null()) .output() @@ -63,12 +117,11 @@ impl Preprocessor for Pandocs { output.status ))); } - let commit_short_hash = str::from_utf8(&output.stdout) - .expect("Commit hash is not valid UTF-8??") - .trim(); + let short_hash = + String::from_utf8(output.stdout).expect("Commit hash is not valid UTF-8??"); let output = Command::new("git") - .args(&["show", "-s", "--format=%ci", "HEAD"]) + .args(["show", "-s", "--format=%ci", what]) .stderr(Stdio::inherit()) .stdin(Stdio::null()) .output() @@ -79,36 +132,25 @@ impl Preprocessor for Pandocs { output.status ))); } - let commit_timestamp = str::from_utf8(&output.stdout) - .expect("Commit hash is not valid UTF-8??") - .trim(); + let timestamp = String::from_utf8(output.stdout).expect("Commit hash is not valid UTF-8??"); - let mut sections = HashMap::new(); - for item in book.iter() { - if let BookItem::Chapter(ref chapter) = item { - self.list_chapter_sections(&mut sections, &chapter); - } - } - - let mut res = Ok(()); - - book.for_each_mut(|item| { - if res.is_err() { - return; - } + Ok(Self { + hash, + short_hash, + timestamp, + }) + } - if let BookItem::Chapter(ref mut chapter) = item { - if let Err(e) = self.process_chapter(chapter, §ions) { - res = Err(e); - } + fn hash(&self) -> &str { + self.hash.trim() + } - if chapter.name == "Foreword" { - chapter.content.push_str(&format!("This document version was produced from git commit [`{}`](https://github.com/gbdev/pandocs/tree/{}) ({}). ", commit_short_hash, commit_hash,commit_timestamp)); - } - } - }); + fn short_hash(&self) -> &str { + self.short_hash.trim() + } - res.map(|_| book) + fn timestamp(&self) -> &str { + self.timestamp.trim() } } @@ -157,16 +199,14 @@ impl Pandocs { } } - fn process_chapter( + fn process_internal_anchor_links( &self, chapter: &mut Chapter, sections: &HashMap, ) -> Result<(), Error> { let mut buf = String::with_capacity(chapter.content.len()); - let extensions = Options::ENABLE_TABLES - | Options::ENABLE_FOOTNOTES - | Options::ENABLE_STRIKETHROUGH - | Options::ENABLE_SMART_PUNCTUATION; + let extensions = + Options::ENABLE_TABLES | Options::ENABLE_FOOTNOTES | Options::ENABLE_STRIKETHROUGH; let events = Parser::new_ext(&chapter.content, extensions).map(|event| match event { Event::Start(Tag::Link(link_type, url, title)) if url.starts_with('#') => { @@ -275,3 +315,182 @@ fn id_from_name(name: &str) -> String { }) .collect::() } + +impl Pandocs { + fn process_bit_descrs(&self, chapter: &mut Chapter) -> Result<(), Error> { + // When replacing one thing in a string by something with a different length, + // the indices after that will not correspond, + // we therefore have to store the difference to correct this + let mut previous_end_index = 0; + let mut replaced = String::with_capacity(chapter.content.len()); + + for result in find_bit_descrs(&chapter.content) { + let (start, end, attrs) = result?; + + replaced.push_str(&chapter.content[previous_end_index..start]); + replaced.push_str(""); + for i in (0..attrs.width).rev() { + replaced.push_str(&format!("", i)); + } + replaced.push_str(""); + + for (name, row) in &attrs.rows { + replaced.push_str(&format!("", name)); + let mut pos = attrs.width; + let mut fields = row.iter().peekable(); + while pos != 0 { + let (start, unused, name) = match fields.peek() { + // If we are at the edge of a "used" field, use it + Some(field) if field.end == pos - 1 => (field.start, false, field.name), + // If in an unused field, end at the next field, or the width if none such + res => (res.map_or(0, |field| field.end + 1), true, ""), + }; + replaced.push_str(&format!( + "", + pos - start, + if unused { + " class=\"unused-field\"" + } else { + "" + }, + name + )); + + if !unused { + fields.next(); + } + pos = start; + } + replaced.push_str(""); + } + replaced.push_str("
{}
{}{}
"); + + previous_end_index = end; + } + + replaced.push_str(&chapter.content[previous_end_index..]); + + chapter.content = replaced; + Ok(()) + } +} + +fn find_bit_descrs( + contents: &str, +) -> impl Iterator), Error>> { + lazy_static! { + static ref RE: Regex = Regex::new( + r"(?x) # Allow comments in the regex + \\\{\{\#.*\}\} # Escaped tag (will be ignored) + | # ...or... + \{\{\s*\#bits\s+ # tag opening braces, whitespace, type, and separating whitespace + ([^}]+) # tag contents + \}\} # closing braces" + ) + .unwrap(); + } + RE.captures_iter(contents) + .filter(|caps| caps.len() != 1) + .map(|caps| { + // Must use `.get()`, as indexing ties the returned value's lifetime to `caps`'s. + let contents = caps.get(1).unwrap().as_str(); + BitDescrAttrs::from_str(contents).map(|attrs| { + let all = caps.get(0).unwrap(); // There is always a 0th capture. + (all.start(), all.end(), attrs) + }) + }) +} + +#[derive(Debug)] +struct BitDescrAttrs<'input> { + width: usize, + rows: Vec<(&'input str, Vec>)>, +} + +impl<'input> BitDescrAttrs<'input> { + fn from_str(contents: &'input str) -> Result { + // First, parse the width. + let contents = contents.trim(); + let width_len = contents + .find(|c: char| c.is_ascii_whitespace()) + .ok_or_else(|| Error::msg("{{#bits}} descriptions must describe at least one thing"))?; + let width_str = &contents[..width_len]; + let width = width_str.parse().context(format!( + "Expected bits description to start with width, got \"{}\"", + width_str + ))?; + let s = contents[width_len..].trim_start(); + + // Next, parse the rows! + let mut rows = Vec::new(); + for row_str in s.split_terminator(';') { + let row_str = row_str.trim(); + + fn parse_name(row_str: &str) -> Option { + if !row_str.starts_with('"') { + return None; + } + + row_str[1..] // Skip the leading quote. + .find('"') + } + let Some(name_len) = parse_name(row_str) else { + bail!("Expected row to begin by its name (did you forget to put quotes around it?)"); + }; + let name = &row_str[1..(name_len + 1)]; + let mut row_str = row_str[(name_len + 2)..].trim_start(); // The end is already trimmed. + + // Then, the fields! + let mut fields: Vec = Vec::new(); + while !row_str.is_empty() { + lazy_static! { + // Since mdBook has "smart quotes", be lenient about them. + static ref RE: Regex = + Regex::new(r#"^(\d+)(?:\s*-\s*(\d+))?\s*:\s*"([^"]*)""#).unwrap(); + } + + let Some(cap) = RE.captures(row_str) else { + bail!("Failed to parse field for \"{}\"", row_str); + }; + let end = cap[1].parse().unwrap(); + let start = cap + .get(2) + .map_or(end, |end_match| end_match.as_str().parse().unwrap()); + let name = &cap.get(3).unwrap().as_str(); + + // Perform sanity checks. + if start > end { + bail!( + "Field must end after it started (expected {} <= {})", + start, + end, + ); + } + if let Some(field) = fields.last() { + if field.end <= start { + bail!( + "Field must start after previous ended (expected {} > {})", + field.end, + start, + ); + } + } + + fields.push(BitDescrField { start, end, name }); + // Advance by the match's length, plus any whitespace after it. + row_str = row_str[cap[0].len()..].trim_start(); + } + + rows.push((name, fields)); + } + + Ok(BitDescrAttrs { width, rows }) + } +} + +#[derive(Debug)] +struct BitDescrField<'a> { + start: usize, + end: usize, + name: &'a str, +} diff --git a/src/OAM.md b/src/OAM.md index a0d40c9d..4c39a537 100644 --- a/src/OAM.md +++ b/src/OAM.md @@ -49,14 +49,16 @@ tile is "NN & \$FE", and the bottom 8×8 tile is "NN | \$01". ## Byte 3 — Attributes/Flags -``` - Bit7 BG and Window over OBJ (0=No, 1=BG and Window colors 1-3 over the OBJ) - Bit6 Y flip (0=Normal, 1=Vertically mirrored) - Bit5 X flip (0=Normal, 1=Horizontally mirrored) - Bit4 Palette number **Non CGB Mode Only** (0=OBP0, 1=OBP1) - Bit3 Tile VRAM-Bank **CGB Mode Only** (0=Bank 0, 1=Bank 1) - Bit2-0 Palette number **CGB Mode Only** (OBP0-7) -``` +{{#bits 8 + "Attributes" 7:"Priority" 6:"Y flip" 5:"X flip" 4:"DMG palette" 3:"Bank" 2-0:"CGB palette"; +}} + +- **Priority**: `0` = No, `1` = BG and Window colors 1-3 over this OBJ +- **Y flip**: `0` = Normal, `1` = Entire OBJ is vertically mirrored +- **X flip**: `0` = Normal, `1` = Entire OBJ is horizontally mirrored +- **DMG palette** *\[Non CGB Mode only\]*: `0` = OBP0, `1` = OBP1 +- **Bank** *\[CGB Mode Only\]*: `0` = Fetch tile in VRAM bank 0, `1` = Fetch tile in VRAM bank 1 +- **CGB palette** *\[CGB Mode Only\]*: Use OBP0-7 ## Writing data to OAM