diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index 4c9dcb7a..73d4b72f 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -27,3 +27,11 @@ jobs:
             git status --ignored
             exit 1
           fi
+
+  tokenizers:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - run: sudo apt install -y llvm-13-dev clang-13 make valgrind
+      - run: LLVM_CONFIG=llvm-config-13 make
+      - run: ./tokenizers.sh
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 6fcad65a..c4a5966d 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -131,3 +131,17 @@ jobs:
         shell: bash
       - run: cd "test dir" && ./runtests.sh --verbose
         shell: bash
+
+  tokenizers:
+    needs: build
+    runs-on: windows-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/download-artifact@v3
+        with:
+          name: windows-zip
+      - run: unzip jou.zip
+      - run: mv tokenizers.sh self_hosted jou
+        shell: bash
+      - run: (cd jou && ./tokenizers.sh)
+        shell: bash
diff --git a/self_hosted/errors_and_warnings.jou b/self_hosted/errors_and_warnings.jou
new file mode 100644
index 00000000..663461d9
--- /dev/null
+++ b/self_hosted/errors_and_warnings.jou
@@ -0,0 +1,25 @@
+from "stdlib/process.jou" import exit
+from "stdlib/io.jou" import stdout, stderr, fprintf, fflush
+
+struct Location:
+    path: byte*  # Not owned. Points to a string that is held elsewhere.
+    lineno: int
+
+def fail(location: Location, message: byte*) -> void:
+    # When stdout is redirected to same place as stderr,
+    # make sure that normal printf()s show up before our error.
+    fflush(stdout)
+    fflush(stderr)
+
+    fprintf(stderr, "compiler error in file \"%s\"", location.path)
+    if location.lineno != 0:
+        fprintf(stderr, ", line %d", location.lineno)
+    fprintf(stderr, ": %s\n", message)
+
+    exit(1)
+
+# TODO: doesn't really belong here
+def assert(b: bool) -> void:
+    if not b:
+        fprintf(stderr, "assertion failed\n")
+        exit(1)
diff --git a/self_hosted/tokenizer.jou b/self_hosted/tokenizer.jou
new file mode 100644
index 00000000..bfca58ad
--- /dev/null
+++ b/self_hosted/tokenizer.jou
@@ -0,0 +1,498 @@
+from "stdlib/io.jou" import printf, FILE, fgetc, ferror, fopen
+from "stdlib/str.jou" import sprintf, strlen, strchr, strcmp
+from "stdlib/mem.jou" import malloc, realloc, free, memset, memmove
+from "./errors_and_warnings.jou" import Location, fail, assert
+
+enum TokenKind:
+    Int
+    Long
+    Float
+    Double
+    Byte    # example: 'a' is 97 as a byte
+    String
+    Name
+    Keyword
+    Newline
+    Indent
+    Dedent
+    Operator
+    EndOfFile  # Marks the end of an array of tokens.
+
+struct Token:
+    kind: TokenKind
+    location: Location
+
+    # Only one of these is used at a time.
+    # TODO: union
+    int_value: int          # Int
+    long_value: long        # Long
+    byte_value: byte        # Byte
+    indentation_level: int  # Newline (indicates how many spaces there are after the newline)
+    short_string: byte[100] # Name, Keyword, Operator
+    long_string: byte*      # String
+
+# TODO: import this (#227 maybe?)
+declare isprint(b: int) -> int
+
+def print_token(token: Token*) -> void:
+    if token->kind == TokenKind::Int:
+        printf("integer %d\n", token->int_value)
+    elif token->kind == TokenKind::Long:
+        printf("long %lld\n", token->long_value)
+    elif token->kind == TokenKind::Float:
+        printf("float %s\n", &token->short_string[0])
+    elif token->kind == TokenKind::Double:
+        printf("double %s\n", &token->short_string[0])
+    elif token->kind == TokenKind::Byte:
+        printf("character %#02x", token->byte_value)
+        if isprint(token->byte_value) != 0:
+            printf(" '%c'", token->byte_value)
+        printf("\n")
+    elif token->kind == TokenKind::EndOfFile:
+        printf("end of file\n")
+    elif token->kind == TokenKind::Operator:
+        printf("operator '%s'\n", &token->short_string[0])
+    elif token->kind == TokenKind::Name:
+        printf("name \"%s\"\n", &token->short_string[0])
+    elif token->kind == TokenKind::Keyword:
+        printf("keyword \"%s\"\n", &token->short_string[0])
+    elif token->kind == TokenKind::Newline:
+        printf("newline token (next line has %d spaces of indentation)\n", token->indentation_level)
+    elif token->kind == TokenKind::String:
+        printf("string \"%s\"\n", token->long_string)
+    elif token->kind == TokenKind::Indent:
+        printf("indent (+4 spaces)\n")
+    elif token->kind == TokenKind::Dedent:
+        printf("dedent (-4 spaces)\n")
+    else:
+        printf("????\n")
+
+struct Tokenizer:
+    f: FILE*
+    location: Location
+    pushback: byte*
+    pushback_len: int  # TODO: dynamic array
+    # Parens array isn't dynamic, so that you can't segfault
+    # the compiler by feeding it lots of nested parentheses,
+    # which would make it recurse too deep.
+    parens: Token[50]
+    parens_len: int
+
+def read_byte(self: Tokenizer*) -> byte:
+    EOF = -1  # FIXME
+
+    c: byte
+    if self->pushback_len > 0:
+        c = self->pushback[--self->pushback_len]
+    else:
+        temp = fgetc(self->f)
+        if temp == '\r':
+            # On Windows, \r just before \n is ignored.
+            temp = fgetc(self->f)
+            if temp != EOF and temp != '\n':
+                # TODO: test this, if possible?
+                fail(self->location, "source file contains a CR byte ('\\r') that isn't a part of a CRLF line ending")
+
+        if temp == EOF:
+            if ferror(self->f) != 0:
+                # TODO: include errno in the error message
+                fail(self->location, "cannot read file")
+            # Use the zero byte to denote end of file.
+            c = '\0'
+        elif temp == '\0':
+            # TODO: test this
+            fail(self->location, "source file contains a zero byte")
+            c = 'x'  # TODO: silences compiler warning, but never runs
+        else:
+            c = temp as byte
+
+    if c == '\n':
+        self->location.lineno++
+    return c
+
+
+def unread_byte(self: Tokenizer*, b: byte) -> void:
+    if b == '\0':
+        return
+
+    assert(b != '\r')
+    self->pushback = realloc(self->pushback, self->pushback_len + 1)
+    self->pushback[self->pushback_len++] = b
+    if b == '\n':
+        self->location.lineno--
+
+def is_identifier_or_number_byte(b: byte) -> bool:
+    return (
+        ('A' <= b and b <= 'Z')
+        or ('a' <= b and b <= 'z')
+        or ('0' <= b and b <= '9')
+        or b == '_'
+    )
+
+def read_identifier_or_number(self: Tokenizer*, first_byte: byte) -> byte[100]:
+    dest: byte[100]
+    memset(&dest, 0, sizeof dest)
+    destlen = 0
+
+    assert(is_identifier_or_number_byte(first_byte))
+    dest[destlen++] = first_byte
+
+    while True:
+        b = read_byte(self)
+        if is_identifier_or_number_byte(b):
+            if destlen == sizeof dest - 1:
+                fail(self->location, "name or number is too long")
+            dest[destlen++] = b
+        else:
+            unread_byte(self, b)
+            return dest
+
+def consume_rest_of_line(self: Tokenizer*) -> void:
+    while True:
+        c = read_byte(self)
+        if c == '\0' or c == '\n':
+            break
+
+# Returns the indentation level for the next line
+def read_newline_token(self: Tokenizer*) -> int:
+    level = 0
+    while True:
+        c = read_byte(self)
+        if c == '\0':
+            # End of file. Do not validate that indentation is a
+            # multiple of 4 spaces. Add a trailing newline implicitly
+            # if needed.
+            #
+            # TODO: test this
+            return 0
+        elif c == '\n':
+            level = 0
+        elif c == '#':
+            consume_rest_of_line(self)
+            level = 0
+        elif c == ' ':
+            level++
+        else:
+            unread_byte(self, c)
+            return level
+
+def read_string(self: Tokenizer*) -> byte*:
+    result: byte* = NULL
+    len = 0
+
+    while True:
+        c = read_byte(self)
+        if c == '"':
+            break
+        elif c == '\n' or c == '\0':
+            if c == '\n':
+                self->location.lineno--
+            fail(self->location, "missing \" to end the string")
+        elif c == '\n':
+            # \n means newline, for example
+            after_backslash = read_byte(self)
+            if after_backslash == '\0':
+                fail(self->location, "missing \" to end the string")
+            elif after_backslash == '\n':
+                result = realloc(result, len+1)
+                result[len++] = '\n'
+            elif after_backslash == 'r':
+                result = realloc(result, len+1)
+                result[len++] = '\r'
+            elif after_backslash == '\\' or after_backslash == '"':
+                result = realloc(result, len+1)
+                result[len++] = after_backslash
+            elif after_backslash == '0':
+                fail(self->location, "strings cannot contain zero bytes (\\0), because that is the special end marker byte")
+            elif '0' <= after_backslash and after_backslash <= '9':
+                result = realloc(result, len+1)
+                result[len++] = after_backslash - '0'
+            elif after_backslash == '\n':
+                # \ at end of line, string continues on next line
+                len = len  # TODO: pass statement
+            else:
+                if after_backslash < 0x80 and isprint(after_backslash) != 0:
+                    message: byte* = malloc(100)
+                    sprintf(message, "unknown escape: '\\%c'", after_backslash)
+                    fail(self->location, message)
+                else:
+                    fail(self->location, "unknown '\\' escape")
+        else:
+            result = realloc(result, len+1)
+            result[len++] = c
+
+    result = realloc(result, len+1)
+    result[len] = '\0'
+    return result
+
+def is_operator_byte(c: byte) -> bool:
+    return c != '\0' and strchr("=<>!.,()[]{};:+-*/&%", c) != NULL
+
+declare strncmp(s1: byte*, s2: byte*, n: long) -> int
+
+# TODO: move to stdlib
+def starts_with(s: byte*, prefix: byte*) -> bool:
+    return strncmp(s, prefix, strlen(prefix)) == 0
+
+def read_operator(self: Tokenizer*) -> byte[100]:
+    # TODO: nicer array syntax
+    operators: byte*[100]
+    i = 0
+    # Longer operators first, so that '==' does not parse as '=' '='
+    operators[i++] = "..."
+    operators[i++] = "==="
+    operators[i++] = "!=="
+    operators[i++] = "=="
+    operators[i++] = "!="
+    operators[i++] = "->"
+    operators[i++] = "<="
+    operators[i++] = ">="
+    operators[i++] = "++"
+    operators[i++] = "--"
+    operators[i++] = "+="
+    operators[i++] = "-="
+    operators[i++] = "*="
+    operators[i++] = "/="
+    operators[i++] = "%="
+    operators[i++] = "::"
+    operators[i++] = "."
+    operators[i++] = ","
+    operators[i++] = ":"
+    operators[i++] = ";"
+    operators[i++] = "="
+    operators[i++] = "("
+    operators[i++] = ")"
+    operators[i++] = "{"
+    operators[i++] = "}"
+    operators[i++] = "["
+    operators[i++] = "]"
+    operators[i++] = "&"
+    operators[i++] = "%"
+    operators[i++] = "*"
+    operators[i++] = "/"
+    operators[i++] = "+"
+    operators[i++] = "-"
+    operators[i++] = "<"
+    operators[i++] = ">"
+    operators[i] = NULL
+
+    operator: byte[100]
+    memset(&operator, 0, sizeof operator)
+
+    # Read as many operator characters as we may need.
+    while strlen(&operator[0]) < 3:
+        c = read_byte(self)
+        if not is_operator_byte(c):
+            unread_byte(self, c)
+            break
+        operator[strlen(&operator[0])] = c
+
+    for op = &operators[0]; *op != NULL; op++:
+        if starts_with(&operator[0], *op):
+            # Unread the bytes we didn't use.
+            while strlen(&operator[0]) > strlen(*op):
+                last = &operator[strlen(&operator[0]) - 1]
+                unread_byte(self, *last)
+                *last = '\0'
+
+            # "===" and "!==" are here only to give a better error message to javascript people.
+            if strcmp(&operator[0], "===") != 0 and strcmp(&operator[0], "!==") != 0:
+                return operator
+
+    message: byte[100]
+    sprintf(&message[0], "there is no '%s' operator", &operator[0])
+    fail(self->location, &message[0])
+    return operator  # TODO: never actually runs, but causes a compiler warning
+
+def is_keyword(word: byte*) -> bool:
+    # TODO: better array syntax
+    keywords: byte*[100]
+    i = 0
+    keywords[i++] = "from"
+    keywords[i++] = "import"
+    keywords[i++] = "def"
+    keywords[i++] = "declare"
+    keywords[i++] = "struct"
+    keywords[i++] = "enum"
+    keywords[i++] = "global"
+    keywords[i++] = "return"
+    keywords[i++] = "if"
+    keywords[i++] = "elif"
+    keywords[i++] = "else"
+    keywords[i++] = "while"
+    keywords[i++] = "for"
+    keywords[i++] = "break"
+    keywords[i++] = "continue"
+    keywords[i++] = "True"
+    keywords[i++] = "False"
+    keywords[i++] = "NULL"
+    keywords[i++] = "and"
+    keywords[i++] = "or"
+    keywords[i++] = "not"
+    keywords[i++] = "as"
+    keywords[i++] = "sizeof"
+    keywords[i++] = "void"
+    keywords[i++] = "bool"
+    keywords[i++] = "byte"
+    keywords[i++] = "int"
+    keywords[i++] = "long"
+    keywords[i++] = "float"
+    keywords[i++] = "double"
+    keywords[i++] = NULL
+
+    for kw = &keywords[0]; *kw != NULL; kw++:
+        if strcmp(*kw, word) == 0:
+            return True
+    return False
+
+# TODO: move to stdlib
+declare atoi(s: byte*) -> int
+
+def read_token(self: Tokenizer*) -> Token:
+    while True:
+        token = Token{location = self->location}
+        b = read_byte(self)
+        if b == ' ':
+            continue
+
+        if b == '\n':
+            if self->parens_len > 0:
+                continue
+            token.kind = TokenKind::Newline
+            token.indentation_level = read_newline_token(self)
+        elif b == '"':
+            token.kind = TokenKind::String
+            token.long_string = read_string(self)
+        elif is_identifier_or_number_byte(b):
+            token.short_string = read_identifier_or_number(self, b)
+            if is_keyword(&token.short_string[0]):
+                token.kind = TokenKind::Keyword
+            elif '0' <= token.short_string[0] and token.short_string[0] <= '9':
+                # TODO: support various other things
+                token.kind = TokenKind::Int
+                token.int_value = atoi(&token.short_string[0])
+            else:
+                token.kind = TokenKind::Name
+        elif is_operator_byte(b):
+            unread_byte(self, b)
+            token.kind = TokenKind::Operator
+            token.short_string = read_operator(self)
+        elif b == '\0':
+            token.kind = TokenKind::EndOfFile
+        else:
+            message: byte[100]
+            sprintf(&message[0], "unexpected byte %#02x", b)
+            fail(self->location, &message[0])
+        return token
+
+def tokenize_without_indent_dedent_tokens(file: FILE*, path: byte*) -> Token*:
+    tokenizer = Tokenizer{
+        location = Location{path = path},
+        f = file,
+    }
+
+    # Add a fake newline to the beginning. It does a few things:
+    #  * Less special-casing: blank lines in the beginning of the file can
+    #    cause there to be a newline token anyway.
+    #  * It is easier to detect an unexpected indentation in the beginning
+    #    of the file, as it becomes just like any other indentation.
+    #  * Line numbers start at 1.
+    tokenizer.pushback = malloc(1)
+    tokenizer.pushback[0] = '\n'
+    tokenizer.pushback_len = 1
+
+    tokens: Token* = NULL
+    len = 0
+    while len == 0 or tokens[len-1].kind != TokenKind::EndOfFile:
+        tokens = realloc(tokens, sizeof(tokens[0]) * (len+1))
+        tokens[len++] = read_token(&tokenizer)
+
+    free(tokenizer.pushback)
+    return tokens
+
+# Creates a new array of tokens with indent/dedent tokens added after
+# newline tokens that change the indentation level.
+def handle_indentations(raw_tokens: Token*) -> Token*:
+    tokens: Token* = NULL
+    ntokens = 0
+    level = 0
+
+    for t = raw_tokens; True; t++:
+        if t->kind == TokenKind::EndOfFile:
+            # Add an extra newline token at end of file and the dedents after it.
+            # This makes it similar to how other newline and dedent tokens work:
+            # the dedents always come after a newline token.
+            tokens = realloc(tokens, sizeof tokens[0] * (ntokens + level/4 + 1))
+            while level != 0:
+                tokens[ntokens++] = Token{location = t->location, kind = TokenKind::Dedent}
+                level -= 4
+            tokens[ntokens++] = *t
+            break
+
+        tokens = realloc(tokens, sizeof tokens[0] * (ntokens+1))
+        tokens[ntokens++] = *t
+
+        if t->kind == TokenKind::Newline:
+            after_newline = t->location
+            after_newline.lineno++
+
+            if t->indentation_level % 4 != 0:
+                fail(after_newline, "indentation must be a multiple of 4 spaces")
+
+            while level < t->indentation_level:
+                tokens = realloc(tokens, sizeof tokens[0] * (ntokens+1))
+                tokens[ntokens++] = Token{location = after_newline, kind = TokenKind::Indent}
+                level += 4
+
+            while level > t->indentation_level:
+                tokens = realloc(tokens, sizeof tokens[0] * (ntokens+1))
+                tokens[ntokens++] = Token{location = after_newline, kind = TokenKind::Dedent}
+                level -= 4
+
+    # Delete the newline token in the beginning.
+    #
+    # If the file has indentations after it, they are now represented by separate
+    # indent tokens and parsing will fail. If the file doesn't have any blank/comment
+    # lines in the beginning, it has a newline token anyway to avoid special casing.
+    assert(tokens[0].kind == TokenKind::Newline)
+    memmove(&tokens[0], &tokens[1], sizeof tokens[0] * (ntokens - 1))
+
+    return tokens
+
+def tokenize(path: byte*) -> Token*:
+    file = fopen(path, "rb")
+    if file == NULL:
+        # TODO: test this
+        # TODO: include errno in the message
+        fail(Location{path=path}, "cannot open file")
+
+    raw_tokens = tokenize_without_indent_dedent_tokens(file, path)
+    better_tokens = handle_indentations(raw_tokens)
+    free(raw_tokens)
+    return better_tokens
+
+def print_tokens(tokens: Token*) -> void:
+    printf("===== Tokens for file \"%s\" =====\n", tokens->location.path)
+    t = tokens
+    current_lineno = -1
+
+    while True:
+        if t->location.lineno != current_lineno:
+            current_lineno = t->location.lineno
+            printf("\nLine %d:\n", current_lineno)
+
+        printf("  ")
+        print_token(t)
+
+        if t->kind == TokenKind::EndOfFile:
+            break
+        t++
+
+    printf("\n")
+
+def main(argc: int, argv: byte**) -> int:
+    assert(argc == 2)
+    tokens = tokenize(argv[1])
+    print_tokens(tokens)
+    free(tokens)
+    return 0
diff --git a/self_hosted/tokenizes_wrong.txt b/self_hosted/tokenizes_wrong.txt
new file mode 100644
index 00000000..b54eac84
--- /dev/null
+++ b/self_hosted/tokenizes_wrong.txt
@@ -0,0 +1,186 @@
+# This is a list of files that are not yet supported by the tokenizer of the self-hosted compiler.
+examples/fib.jou
+examples/x11_window.jou
+tests/syntax_error/import_after_def.jou
+tests/syntax_error/missing_return_type.jou
+tests/syntax_error/missing_arg_type.jou
+tests/syntax_error/declare_global_with_value.jou
+tests/syntax_error/arg_after_dotdotdot.jou
+tests/syntax_error/chained_eq.jou
+tests/syntax_error/bad_type.jou
+tests/syntax_error/missing_import_keyword.jou
+tests/syntax_error/missing_second_equal_sign.jou
+tests/syntax_error/hex.jou
+tests/syntax_error/double_assignment.jou
+tests/syntax_error/double_with_letters_after.jou
+tests/syntax_error/dot_after_e.jou
+tests/syntax_error/unnecessary_zero.jou
+tests/syntax_error/bad_addressof.jou
+tests/syntax_error/bin.jou
+tests/syntax_error/arg_default.jou
+tests/syntax_error/def_missing_args.jou
+tests/syntax_error/bad_expression.jou
+tests/syntax_error/missing_field_names.jou
+tests/syntax_error/bad_field.jou
+tests/syntax_error/string_zero_byte.jou
+tests/syntax_error/triple_equals.jou
+tests/syntax_error/import_missing_quotes.jou
+tests/syntax_error/unknown_escape_ascii.jou
+tests/syntax_error/bad_struct_field_name.jou
+tests/syntax_error/python_style_for.jou
+tests/syntax_error/import1.jou
+tests/syntax_error/and_or_chaining.jou
+tests/syntax_error/bad_function_name_after_def.jou
+tests/syntax_error/empty_char.jou
+tests/syntax_error/import_missing_comma_with_parens.jou
+tests/syntax_error/2bad.jou
+tests/syntax_error/array_size.jou
+tests/syntax_error/multidot_float.jou
+tests/syntax_error/0b2.jou
+tests/syntax_error/ee.jou
+tests/syntax_error/overlong_char.jou
+tests/syntax_error/dotdotdot_dotdotdot.jou
+tests/syntax_error/bad_byte.jou
+tests/syntax_error/first_line_indent.jou
+tests/syntax_error/too_many_closing_parens.jou
+tests/syntax_error/indentation_not4.jou
+tests/syntax_error/import_missing_dot.jou
+tests/syntax_error/unknown_escape_multibyte.jou
+tests/syntax_error/infinite_c_style_for.jou
+tests/syntax_error/struct_missing_type.jou
+tests/syntax_error/bad_toplevel_declaration.jou
+tests/syntax_error/missing_indentation.jou
+tests/syntax_error/import_missing_comma.jou
+tests/syntax_error/bad_argument_name.jou
+tests/syntax_error/mismatched_close_brace.jou
+tests/syntax_error/bad_statement.jou
+tests/syntax_error/indexing.jou
+tests/syntax_error/struct_init_js_syntax.jou
+tests/syntax_error/missing_colon.jou
+tests/syntax_error/missing_number_after_eminus.jou
+tests/syntax_error/bad_struct_name.jou
+tests/syntax_error/missing_number_after_e.jou
+tests/syntax_error/chained_le.jou
+tests/syntax_error/float.jou
+tests/syntax_error/double_not.jou
+tests/syntax_error/too_many_opening_parens.jou
+tests/syntax_error/struct_default.jou
+tests/crash/null_deref.jou
+tests/wrong_type/assign_void.jou
+tests/wrong_type/arg.jou
+tests/wrong_type/assign_with_type.jou
+tests/wrong_type/while.jou
+tests/wrong_type/var_assignment.jou
+tests/wrong_type/cannot_be_indexed.jou
+tests/wrong_type/float_and_double.jou
+tests/wrong_type/struct_member_assign.jou
+tests/wrong_type/deref_non_pointer.jou
+tests/wrong_type/elif.jou
+tests/wrong_type/int_to_enum.jou
+tests/wrong_type/arrow_operator_not_struct.jou
+tests/wrong_type/enum_member_from_struct.jou
+tests/wrong_type/enum_to_int.jou
+tests/wrong_type/assign_to_deref_non_pointer.jou
+tests/wrong_type/index.jou
+tests/wrong_type/dot_operator.jou
+tests/wrong_type/neg.jou
+tests/wrong_type/array_to_ptr.jou
+tests/wrong_type/arg_with_varargs.jou
+tests/wrong_type/inplace_add_doesnt_go_back.jou
+tests/wrong_type/mod.jou
+tests/wrong_type/pointer_assignment.jou
+tests/wrong_type/for.jou
+tests/wrong_type/bool_main.jou
+tests/wrong_type/plusplus.jou
+tests/wrong_type/not.jou
+tests/wrong_type/brace_init_arg.jou
+tests/wrong_type/return_value.jou
+tests/wrong_type/if.jou
+tests/wrong_type/struct_member_init.jou
+tests/wrong_type/void_main.jou
+tests/wrong_type/array_vararg.jou
+tests/wrong_type/pointer_eq.jou
+tests/wrong_type/or.jou
+tests/wrong_type/arrow_operator_not_pointer.jou
+tests/should_succeed/assign.jou
+tests/should_succeed/enum.jou
+tests/should_succeed/string_syntax.jou
+tests/should_succeed/octalnuber.jou
+tests/should_succeed/sizeof.jou
+tests/should_succeed/global_bug.jou
+tests/should_succeed/crlf.jou
+tests/should_succeed/loops.jou
+tests/should_succeed/add_sub_mul_div_mod.jou
+tests/should_succeed/printf.jou
+tests/should_succeed/undefined_value_warning.jou
+tests/should_succeed/global.jou
+tests/should_succeed/pointer.jou
+tests/should_succeed/mathlibtest.jou
+tests/should_succeed/sscanf.jou
+tests/should_succeed/plusplus_minusminus.jou
+tests/should_succeed/stderr.jou
+tests/should_succeed/return_void.jou
+tests/should_succeed/unreachable_warning.jou
+tests/should_succeed/local_import.jou
+tests/should_succeed/array.jou
+tests/should_succeed/compare.jou
+tests/should_succeed/and_or_not.jou
+tests/should_succeed/struct.jou
+tests/should_succeed/argument.jou
+tests/should_succeed/compiler_cli.jou
+tests/should_succeed/file.jou
+tests/should_succeed/expfloat.jou
+tests/should_succeed/implicit_conversions.jou
+tests/should_succeed/return_string.jou
+tests/should_succeed/as.jou
+tests/should_succeed/if_elif_else.jou
+tests/should_succeed/unused_import.jou
+tests/other_errors/missing_return.jou
+tests/other_errors/brace_init_dupe.jou
+tests/other_errors/double_plusplus.jou
+tests/other_errors/array0.jou
+tests/other_errors/address_of_minusminus.jou
+tests/other_errors/function_wrong_n_args.jou
+tests/other_errors/duplicate_enum_member.jou
+tests/other_errors/continue_outside_loop.jou
+tests/other_errors/varargs_def.jou
+tests/other_errors/runtime_return_1.jou
+tests/other_errors/dumb_assignment.jou
+tests/other_errors/dynamic_array_length.jou
+tests/other_errors/var_shadow.jou
+tests/other_errors/address_of_array_indexing.jou
+tests/other_errors/redefine_imported_func.jou
+tests/other_errors/struct_already_exists.jou
+tests/other_errors/imported_error.jou
+tests/other_errors/break_outside_loop.jou
+tests/other_errors/void_as_type.jou
+tests/other_errors/dumb_assignment_with_plusequals.jou
+tests/other_errors/using_void_function.jou
+tests/other_errors/immediate_member_assign.jou
+tests/other_errors/unexpected_return_value.jou
+tests/other_errors/duplicate_arg_name.jou
+tests/other_errors/missing_value_in_return.jou
+tests/other_errors/duplicate_field_name.jou
+tests/404/enum.jou
+tests/404/function.jou
+tests/404/var.jou
+tests/404/type.jou
+tests/404/import_wrong_func.jou
+tests/404/struct_field.jou
+tests/404/enum_member.jou
+tests/404/import_symbol.jou
+tests/404/var_addressof.jou
+tests/404/file.jou
+tests/404/import_symbol_multiline.jou
+tests/already_exists_error/global_var.jou
+tests/already_exists_error/struct_import.jou
+tests/already_exists_error/global_var_import.jou
+tests/already_exists_error/struct.jou
+tests/already_exists_error/func.jou
+tests/already_exists_error/local_var.jou
+tests/already_exists_error/func_import.jou
+tests/already_exists_error/struct_and_enum.jou
+tests/too_long/long.jou
+tests/too_long/nested_parentheses.jou
+tests/too_long/name.jou
+tests/too_long/int.jou
diff --git a/src/jou_compiler.h b/src/jou_compiler.h
index 69f99104..bbdadb15 100644
--- a/src/jou_compiler.h
+++ b/src/jou_compiler.h
@@ -49,6 +49,7 @@ typedef struct CfInstruction CfInstruction;
 
 struct CommandLineFlags {
     bool verbose;  // Whether to print a LOT of debug info
+    bool tokenize_only;  // If true, tokenize the file passed on command line and don't actually compile anything
     int optlevel;  // Optimization level (0 don't optimize, 3 optimize a lot)
     const char *outfile;  // If not NULL, where to output executable
     const char *linker_flags;  // String that is appended to linking command
diff --git a/src/main.c b/src/main.c
index 1950d5f6..d5e385da 100644
--- a/src/main.c
+++ b/src/main.c
@@ -42,6 +42,7 @@ static const char help_fmt[] =
     "  -o OUTFILE       output an executable file, don't run the code\n"
     "  -O0/-O1/-O2/-O3  set optimization level (0 = default, 3 = runs fastest)\n"
     "  --verbose        display a lot of information about all compilation steps\n"
+    "  --tokenize-only  display only the output of the tokenizer, don't do anything else\n"
     "  --linker-flags   appended to the linker command, so you can use external libraries\n"
     ;
 
@@ -76,6 +77,13 @@ static void parse_arguments(int argc, char **argv, CommandLineFlags *flags, cons
         } else if (!strcmp(argv[i], "--verbose")) {
             flags->verbose = true;
             i++;
+        } else if (!strcmp(argv[i], "--tokenize-only")) {
+            if (argc > 3) {
+                fprintf(stderr, "%s: --tokenize-only cannot be used together with other flags", argv[0]);
+                goto wrong_usage;
+            }
+            flags->tokenize_only = true;
+            i++;
         } else if (!strcmp(argv[i], "--linker-flags")) {
             if (flags->linker_flags) {
                 fprintf(stderr, "%s: --linker-flags cannot be given multiple times", argv[0]);
@@ -157,6 +165,18 @@ static struct FileState *find_file(const struct CompileState *compst, const char
     return NULL;
 }
 
+static FILE *open_the_file(const char *path, const Location *import_location)
+{
+    FILE *f = fopen(path, "rb");
+    if (!f) {
+        if (import_location)
+            fail_with_error(*import_location, "cannot import from \"%s\": %s", path, strerror(errno));
+        else
+            fail_with_error((Location){.filename=path}, "cannot open file: %s", strerror(errno));
+    }
+    return f;
+}
+
 static void parse_file(struct CompileState *compst, const char *filename, const Location *import_location)
 {
     if (find_file(compst, filename))
@@ -164,15 +184,10 @@ static void parse_file(struct CompileState *compst, const char *filename, const
 
     struct FileState fs = { .path = strdup(filename) };
 
-    FILE *f = fopen(fs.path, "rb");
-    if (!f) {
-        if (import_location)
-            fail_with_error(*import_location, "cannot import from \"%s\": %s", filename, strerror(errno));
-        else
-            fail_with_error((Location){.filename=filename}, "cannot open file: %s", strerror(errno));
-    }
+    FILE *f = open_the_file(fs.path, import_location);
     Token *tokens = tokenize(f, fs.path);
     fclose(f);
+
     if(compst->flags.verbose)
         print_tokens(tokens);
 
@@ -394,6 +409,15 @@ int main(int argc, char **argv)
         printf("Data layout: %s\n", get_target()->data_layout);
     }
 
+    if (compst.flags.tokenize_only) {
+        FILE *f = open_the_file(filename, NULL);
+        Token *tokens = tokenize(f, filename);
+        fclose(f);
+        print_tokens(tokens);
+        free_tokens(tokens);
+        return 0;
+    }
+
 #ifdef _WIN32
     char *startup_path = malloc(strlen(compst.stdlib_path) + 50);
     sprintf(startup_path, "%s/_windows_startup.jou", compst.stdlib_path);
diff --git a/src/print.c b/src/print.c
index 215fca53..4588c080 100644
--- a/src/print.c
+++ b/src/print.c
@@ -93,10 +93,10 @@ void print_token(const Token *token)
         printf("end of file\n");
         break;
     case TOKEN_INDENT:
-        printf("more indentation (+4 spaces)\n");
+        printf("indent (+4 spaces)\n");
         break;
     case TOKEN_DEDENT:
-        printf("less indentation (-4 spaces)\n");
+        printf("dedent (-4 spaces)\n");
         break;
     case TOKEN_OPERATOR:
         printf("operator '%s'\n", token->data.operator);
diff --git a/stdlib/io.jou b/stdlib/io.jou
index 5a13461f..842e972b 100644
--- a/stdlib/io.jou
+++ b/stdlib/io.jou
@@ -60,6 +60,10 @@ declare fprintf(file: FILE *, pattern: byte*, ...) -> int
 declare fgetc(file: FILE*) -> int  # see getchar()
 declare fscanf(file: FILE*, pattern: byte*, ...) -> int
 
+# Ensure that output is actually written. It may remain buffered
+# if this function isn't called.
+declare fflush(file: FILE*) -> int
+
 # Read a line of text from file into a string starting at the given
 # pointer. Reading stops at newline character, end of file, on error,
 # or when the resulting string (including the '\0') wouldn't fit
@@ -68,5 +72,9 @@ declare fscanf(file: FILE*, pattern: byte*, ...) -> int
 # Return value: NULL on error, same as destination on success.
 declare fgets(destination: byte*, n: int, file: FILE*) -> byte*
 
+# TODO: document
+declare feof(file: FILE*) -> int
+declare ferror(file: FILE*) -> int
+
 # Move back to beginning of file.
 declare rewind(file: FILE*) -> void
diff --git a/stdlib/mem.jou b/stdlib/mem.jou
index 94250f58..bf303cfc 100644
--- a/stdlib/mem.jou
+++ b/stdlib/mem.jou
@@ -3,6 +3,10 @@
 # Heap allocations
 # TODO: write a tutorial about using these and add a link
 declare malloc(size: long) -> void*
+declare realloc(ptr: void*, size: long) -> void*
 declare free(ptr: void*) -> void
 
+# TODO: explain what each of these does
+declare memset(dest: void*, fill_byte: int, count: long) -> void*
 declare memcpy(dest: void*, source: void*, count: long) -> void*
+declare memmove(dest: void*, source: void*, count: long) -> void*
diff --git a/stdlib/str.jou b/stdlib/str.jou
index bdbe565d..0fb56698 100644
--- a/stdlib/str.jou
+++ b/stdlib/str.jou
@@ -11,3 +11,12 @@ declare snprintf(dest: byte*, n: long, pattern: byte*, ...) -> int
 
 # Find a substring. Return a pointer to the occurrence in haystack, or NULL if not found.
 declare strstr(haystack: byte*, needle: byte*) -> byte*
+
+# Similar to strstr(), but searches for a single byte rather than a substring.
+declare strchr(haystack: byte*, needle: byte) -> byte*
+
+# Calculate the length of a string in bytes. Note that strlen("ö") == 2, for example.
+declare strlen(s: byte*) -> long
+
+# Compare the strings. Return 0 for equal, or nonzero for not equal.
+declare strcmp(s1: byte*, s2: byte*) -> int
diff --git a/tests/should_succeed/compiler_cli.jou b/tests/should_succeed/compiler_cli.jou
index 1dfe419b..328908af 100644
--- a/tests/should_succeed/compiler_cli.jou
+++ b/tests/should_succeed/compiler_cli.jou
@@ -30,6 +30,7 @@ def main() -> int:
     run_jou("lolwat.jou")  # Output: compiler error in file "lolwat.jou": cannot open file: No such file or directory
     run_jou("--linker-flags")  # Output: <jouexe>: there must be a string of flags after --linker-flags (try "<jouexe> --help")
     run_jou("--linker-flags x --linker-flags y")  # Output: <jouexe>: --linker-flags cannot be given multiple times (try "<jouexe> --help")
+    run_jou("--tokenize-only -O1 examples/hello.jou")  # Output: <jouexe>: --tokenize-only cannot be used together with other flags (try "<jouexe> --help")
 
     # Output: Usage:
     # Output:   <jouexe> [-o OUTFILE] [-O0|-O1|-O2|-O3] [--verbose] [--linker-flags "..."] FILENAME
@@ -40,6 +41,7 @@ def main() -> int:
     # Output:   -o OUTFILE       output an executable file, don't run the code
     # Output:   -O0/-O1/-O2/-O3  set optimization level (0 = default, 3 = runs fastest)
     # Output:   --verbose        display a lot of information about all compilation steps
+    # Output:   --tokenize-only  display only the output of the tokenizer, don't do anything else
     # Output:   --linker-flags   appended to the linker command, so you can use external libraries
     run_jou("--help")
 
diff --git a/tokenizers.sh b/tokenizers.sh
new file mode 100755
index 00000000..86b14f5d
--- /dev/null
+++ b/tokenizers.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+#
+# There are two Jou compilers: one written in C and another written in Jou.
+# They should be able to tokenize each Jou file in exactly the same way.
+# If tokenizing a Jou file fails, both tokenizers should fail with the same error message.
+
+if [[ "$OS" =~ Windows ]]; then
+    dotexe=.exe
+else
+    dotexe=
+fi
+
+set -e
+
+rm -rf tmp/tokenizers
+mkdir -vp tmp/tokenizers
+
+echo "Compiling the self-hosted compiler..."
+./jou${dotexe} -O1 -o tmp/tokenizers/self_hosted${dotexe} self_hosted/tokenizer.jou 
+
+for file in $(find examples tests -name '*.jou'); do
+    echo $file
+    (./jou${dotexe} --tokenize-only $file || true) &> tmp/tokenizers/compiler_written_in_c.txt
+    (tmp/tokenizers/self_hosted${dotexe} $file || true) &> tmp/tokenizers/self_hosted.txt
+
+    if grep -qxF $file self_hosted/tokenizes_wrong.txt; then
+        # The file is skipped, so the two compilers should behave differently
+        if diff tmp/tokenizers/compiler_written_in_c.txt tmp/tokenizers/self_hosted.txt >/dev/null; then
+            echo "  Error: Tokenizers behave the same even though the file is listed in self_hosted/tokenizes_wrong.txt."
+            echo "  To fix this error, delete the \"$file\" line from self_hosted/tokenizes_wrong.txt."
+            exit 1
+        else
+            echo "  Tokenizers behave differently as expected (listed in self_hosted/tokenizes_wrong.txt)"
+        fi
+    else
+        if diff -u --color=always tmp/tokenizers/compiler_written_in_c.txt tmp/tokenizers/self_hosted.txt; then
+            echo "  Tokenizers behave the same as expected"
+        else
+            echo "  Error: Tokenizers behave differently when given \"$file\"."
+            echo "  You can silence this error by adding \"$file\" to tmp/tokenizers/self_hosted.txt."
+            echo "  Ideally the tokenizers would behave in the same way for all files, but we aren't there yet."
+            exit 1
+        fi
+    fi
+done
+
+echo ""
+echo ""
+echo "success :)"