From 47a1590b2edb862519b58cd08c9678b77a607ac9 Mon Sep 17 00:00:00 2001
From: ZeroIntensity <zintensitydev@gmail.com>
Date: Tue, 5 Mar 2024 16:46:34 -0500
Subject: [PATCH] add gen.py

---
 .gitignore |   1 -
 gen.py     | 495 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 495 insertions(+), 1 deletion(-)
 create mode 100644 gen.py

diff --git a/.gitignore b/.gitignore
index 151193b..609fb96 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,7 +13,6 @@ vgcore.*
 _pointers.cpython*
 *.egg-info/
 wheelhouse/
-gen.py
 ext/
 *.o
 compile_flags.txt
diff --git a/gen.py b/gen.py
new file mode 100644
index 0000000..935c819
--- /dev/null
+++ b/gen.py
@@ -0,0 +1,495 @@
+# this file shouldnt be pep 8 checked
+from __future__ import annotations
+
+import asyncio
+import ctypes
+import os
+import re
+import sysconfig
+from contextlib import suppress
+
+import aiofiles  # type: ignore
+import aiohttp
+import requests
+from bs4 import BeautifulSoup, Tag
+
+from src.pointers.std_structs import STRUCT_MAP
+
+PAGES: dict[str, BeautifulSoup] = {}
+BASE_URL: str = "https://docs.python.org/3.11/c-api"
+C_FUNC = re.compile(
+    r"^(((.+) )?(\w+(\**)*)) (\w+)\(((((.+ \w+(\[\])?,?)*(, ?\.\.\.)?))|void)\)+$"
+)
+COMMENT = re.compile(r"\/\*.*\*\/")
+
+
+def ct(data: str) -> str:
+    return f"ctypes.{data}"
+
+
+def ctc(data: str) -> str:
+    return f"ctypes.c_{data}"
+
+
+def ctp(data: str) -> str:
+    return f"ctypes.POINTER({data})"
+
+
+WCHAR_P = ctc("wchar_p")
+CHAR_P = ctc("char_p")
+VOID_P = ctc("void_p")
+WCHAR = ctc("wchar")
+DOUBLE_QUOTE: str = '"'
+TRIPLE_QUOTE: str = '"""'
+SSIZE = ctc("ssize_t")
+INT = ctc("int")
+
+C_TYPES = {
+    "void": "None",
+    "PyObject*": ct("py_object"),
+    "int": INT,
+    "void*": VOID_P,
+    "Py_ssize_t": SSIZE,
+    "char": ctc("char"),
+    "char*": CHAR_P,
+    "const char*": CHAR_P,
+    "unsigned long": ctc("ulong"),
+    "unsigned long long": ctc("ulonglong"),
+    "unsigned int": ctc("uint"),
+    "long long": ctc("longlong"),
+    "size_t": ctc("size_t"),
+    "double": ctc("double"),
+    "long": ctc("long"),
+    "uint64_t": ctc("uint64"),
+    "int64_t": ctc("int64"),
+    # docs have invalid definitions of wchar apparently
+    "wchar*": WCHAR_P,
+    "wchar_t*": WCHAR_P,
+    "w_char*": WCHAR_P,
+    "va_list": VOID_P,
+    "wchar_t": WCHAR,
+    "PyTypeObject": "PyTypeObject",
+    "Py_UCS4": "Py_UCS4",
+    "PyThreadState": "PyThreadState",
+    "PyVarObject": "PyVarObject",
+    "PyFrameObject": "PyFrameObject",
+    "PyInterpreterState": "PyInterpreterState",
+    "PyType_Spec": "PyType_Spec",
+    "Py_tss_t": "Py_tss_t",
+    "Py_hash_t": SSIZE,
+    "Py_buffer": "Py_buffer",
+    "PyOS_sighandler_t": VOID_P,
+    "PyGILState_STATE": INT,
+    "PyModuleDef": "PyModuleDef",
+    "struct PyModuleDef": "PyModuleDef",
+    "PyCodeObject": "PyCodeObject",
+    "PyCapsule_Destructor": VOID_P,
+    "PyGILState": INT,
+    "PyMethodDef": "PyMethodDef",
+    "PyGetSetDef": "PyGetSetDef",
+    "struct PyMethodDef*": "ctypes.POINTER(PyMethodDef)",
+    "struct PyGetSetDef*": "ctypes.POINTER(PyGetSetDef)",
+    "FILE*": VOID_P,
+    "PySendResult": INT
+}
+
+CT_TYPES = {
+    "char_p": "StringLike",
+    "wchar_p": "str",
+    "wchar": "str",
+    "long": "int",
+    "longlong": "int",
+    "size_t": "int",
+    "ssize_t": "int",
+    "int": "int",
+    "uint64": "int",
+    "int64": "int",
+    "uint": "int",
+    "ulong": "int",
+    "ulonglong": "int",
+    "py_object": "PyObjectLike",
+    "void_p": "PointerLike",
+    "char": "CharLike",
+    "double": "int",
+}
+
+NEWLINE = "\n"
+
+HARDCODED_NAMES: dict[str, str] = {
+    "GC_IsTracked": "gc_is_tracked",
+    "GC_Track": "gc_track",
+    "GC_UnTrack": "gc_untrack",
+    "GC_IsFinalized": "gc_is_finalized",
+    "GC_Del": "gc_del",
+}
+
+NAME_GROUPS: list[str] = ["ASCII", "UTF", "UCS", "FS"]
+
+
+def not_found(item: str, func: str) -> None:
+    print("Not found...", item, "in", func)
+
+
+def _write_autogen(file: str, text: str) -> None:
+    with open(f"./src/pointers/{file}") as f:
+        lines = f.read().split("\n")  # readlines was keeping the \n
+
+    with open(f"./src/pointers/{file}", "w") as f:
+        try:
+            index = lines.index("# autogenerated")
+        except ValueError:
+            index = lines.index(
+                "# autogenerated "
+            )  # in case there's trailing whitespace
+
+        f.write(
+            "\n".join(lines[: index + 1]) + f"\n{text}",
+        )
+
+
+def _get_type(ctype: str, *, add_pointer: bool = False) -> str | None:
+    typ = C_TYPES.get(ctype)
+
+    if typ:
+        return f"{typ if not add_pointer else f'ctypes.POINTER({typ})'}"
+    else:
+        if ctype.endswith("*"):
+            index = ctype.index("*")
+            ptrs = ctype[index:].count("*") + add_pointer
+            join = ctype[:index]
+
+            typ = C_TYPES.get(f"{join}*")
+
+            if not typ:
+                typ = C_TYPES.get(join)
+            else:
+                ptrs -= 1
+
+            if not typ:
+                return None
+
+            typ = "".join(
+                [
+                    *["ctypes.POINTER(" for _ in range(ptrs)],
+                    typ,
+                    *[")" for _ in range(ptrs)],
+                ]
+            )
+            return typ
+    return None
+
+
+async def _gen_str(
+    name: str | None,
+    signature: str,
+    params: dict[str, list[str]],
+    minver: str | None,
+) -> str | None:
+    signature = signature.replace(" *", "* ").replace("* *", "** ").replace("struct ", "")
+
+    for i in {"#", "//", "typedef", "static", "/*"}:
+        if signature.startswith(i):
+            return None
+    match = C_FUNC.match(signature)
+
+    if not name:
+        if match:
+            name = match.group(6)
+
+    if match and (name not in params):
+        assert name
+        params[name] = []
+        group = match.group(1)
+        ret = _get_type(group)
+
+        if not ret:
+            not_found(group, name)
+            return None
+
+        if match.group(12):
+            argtypes = ""
+        else:
+            args = match.group(7)
+            if not args:
+                args = "void"
+            argtypes = ", ("
+
+            if args != "void":
+                for arg in args.split(", "):
+                    arg_split = arg.split(" ")
+                    argname = arg_split.pop(-1)
+                    add_pointer: bool = False
+
+                    if argname.endswith("[]"):
+                        argname = argname[:-2]
+                        add_pointer = True
+
+                    params[name].append(argname if argname != "def" else "df")
+
+                    join = " ".join(arg_split).replace(
+                        "const ", ""
+                    )  # we dont care about consts
+                    typ = _get_type(join, add_pointer=add_pointer)
+
+                    if not typ:
+                        not_found(join, name)
+                        continue
+
+                    argtypes += typ + ","
+
+            argtypes += ")"
+
+        return f"# {signature}\n_register('{name}', {ret}{argtypes}{f', minver={DOUBLE_QUOTE}{minver}{DOUBLE_QUOTE},' if minver else ''})\n"
+    return None  # to make mypy happy
+
+
+async def _gen_ct_bindings() -> dict[str, list[str]]:
+    params: dict[str, list[str]] = {}
+
+    out: str = "\n\n"
+    async with aiohttp.ClientSession() as s:
+        async with s.get(f"{BASE_URL}/stable.html#stable-application-binary-interface") as resp:
+            soup = BeautifulSoup(await resp.text(), features="html.parser")
+            ul = soup.find("ul", attrs={"class": "simple"})
+            assert ul
+
+            for tag in ul:
+                if not isinstance(tag, Tag):
+                    continue
+
+                p = tag.find("p", recursive=True)
+                assert p
+                a = p.find("a")
+
+                if a:
+                    assert type(a) is Tag
+                    name: str = a.get_text().replace("()", "")
+                    href = a.attrs["href"]
+                    path = href[: href.find(".html")]
+
+                    if path not in PAGES:
+                        print("Loading page... ", path)
+                        PAGES[path] = BeautifulSoup(
+                            requests.get(f"{BASE_URL}/{path}.html").text,
+                            features="html.parser",
+                        )
+
+                    page = PAGES[path]
+                    signature: str = ""
+                    doc = page.find(id=f"c.{name}")
+                    assert doc, f"{page} {name}"
+
+                    for tg in doc:
+                        if isinstance(tg, str):
+                            signature += tg if tg != "\n" else ""
+                            continue
+
+                        text: str = tg.get_text()
+                        if text != "Â¶":
+                            signature += text
+
+                    assert type(doc) is Tag
+                    parent = doc.parent
+                    assert parent
+
+                    minver_soup = parent.find(
+                        "span",
+                        attrs={"class": "versionmodified added"},
+                        recursive=True,
+                    )
+                    minver: str | None = None
+
+                    if minver_soup:
+                        minver = minver_soup.get_text()[:-1].split(" ")[-1]
+                        # this is super janky
+
+                    result = await _gen_str(
+                        name,
+                        signature,
+                        params,
+                        minver,
+                    )
+
+                    if result:
+                        out += result
+
+    include = sysconfig.get_path("include")
+
+    print(f"Reading signatures from {include}")
+    for root, _, files in os.walk(include):
+        for i in files:
+            path = os.path.join(root, i)
+            if os.path.isdir(path):
+                continue
+
+            async with aiofiles.open(path) as f:
+                print("Loading file... ", path)
+
+                lines = COMMENT.sub("", (await f.read()).replace("\n", "").replace("  ", "").replace("  ", "")).split(";")
+
+                for raw_line in lines:
+                    if "PyAPI_FUNC" not in raw_line:
+                        continue
+
+                    split = raw_line.split("PyAPI_FUNC")
+                    line = 'PyAPI_FUNC' + ''.join(split[1:])
+                    line = line.replace(";", "")
+
+                    idx = line.index(")")
+                    line = line[11:idx] + line[idx + 1:]
+
+                    patched_line = ""
+
+                    for index, char in enumerate(line):
+                        patched_line += char
+
+                        if char == ",":
+                            with suppress(IndexError):
+                                if line[index + 1] != " ":
+                                    patched_line += " "
+
+
+                    patched_line = patched_line.replace(" *", "* ").replace("* *", "** ").replace("  ", " ").replace(" )", ")").replace(" ,", ",")
+                    result = await _gen_str(
+                        None,
+                        patched_line,
+                        params,
+                        None,
+                    )
+
+                    if result:
+                        out += result
+                    else:
+                        print("No result...", patched_line)
+
+    _write_autogen("_pyapi.py", out)
+    return params
+
+
+def map_type(typ: type["ctypes._CData"] | None) -> str:
+    if not typ:
+        return "None"
+    name = typ.__name__
+
+    if name.startswith("LP_"):
+        actual_name = name[3:]
+
+        for k, v in STRUCT_MAP.items():
+            s_name: str = k.__name__
+            if s_name == actual_name:
+                return f"StructPointer[{v.__name__}]"
+
+        return "PointerLike"
+
+    return CT_TYPES[name[2:] if name != "py_object" else name]
+
+
+def get_converter(data: str, typ: str) -> str:
+    if typ == "StringLike":
+        return f"make_string({data})"
+
+    elif typ == "CharLike":
+        return f"make_char({data})"
+
+    elif typ == "Format":
+        return f"make_format({data})"
+
+    elif typ == "PyObjectLike":
+        return f"_deref_maybe({data})"
+
+    return data
+
+
+async def main():
+    params = await _gen_ct_bindings()
+    while True:
+        yn = input("regen api_bindings.py (y/n)? ").lower()
+
+        if yn not in {"y", "n"}:
+            continue
+
+        if yn == "n":
+            return
+        break
+
+    out: str = ""
+    from src.pointers._pyapi import API_FUNCS
+
+    funcs: dict[str, list[str]] = {}
+
+    for k, v in API_FUNCS.items():
+        func = v[0]
+
+        if not func:
+            continue
+
+        zip_params = (params[k], func.argtypes)
+
+        if func.argtypes is None:
+            print("No argtypes...", func.__name__)
+            continue
+
+        fparams = [f"{param}: {map_type(typ)}" for param, typ in zip(*zip_params)]
+        restype: type["ctypes._CData"] = func.restype  # type: ignore
+
+        name_split = k.split("_")
+        section = name_split[0]
+
+        if not section:
+            name_split.pop(0)
+            section = "_" + name_split[0]
+
+        if section not in funcs:
+            funcs[section] = []
+
+        origin_name = "_".join(name_split[1:])
+        name = HARDCODED_NAMES.get(origin_name) or ""
+
+        if not name:
+            for i in NAME_GROUPS:
+                if i in origin_name:
+                    index = origin_name.index(i)
+                    origin_name = origin_name.replace(
+                        i,
+                        f"{'_' if index else ''}{i.lower()}{'_' if (index + len(i)) != len(origin_name) else ''}",
+                    )
+
+            for index, i in enumerate(origin_name):
+                lower: str = i.lower()
+
+                if i.isupper():
+                    name += ("_" if index else "") + lower
+                else:
+                    name += lower
+
+            if name in {"or", "and", "import", "not", "is"}:
+                name += "_"
+
+        funcs[section].append(
+            f"""
+    # {k}
+    @staticmethod
+    def {name}({', '.join(fparams)}) -> {map_type(restype)}:
+        return api_binding_base(API_FUNCS["{k}"], {', '.join([get_converter(i, map_type(typ)) for i, typ in zip(*zip_params)])})
+"""
+        )
+
+    for k, v in funcs.items():
+        out += f"""class {k}(_CallBase):
+    {TRIPLE_QUOTE}Namespace containing API functions prefixed with `{k}_`{TRIPLE_QUOTE}
+{NEWLINE.join(v)}
+"""
+
+    all_str = "__all__ = ("
+
+    for i in funcs:
+        all_str += f'"{i}",'
+
+    out = all_str + ")\n\n" + out
+
+    _write_autogen("api_bindings.py", out)
+    print("success!")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())