evennia/docs/pylib/auto_link_remapper.py

279 lines
9.3 KiB
Python
Raw Normal View History

"""
Build a TOC-tree; Sphinx requires it and this makes it easy to just
add/build/link new files without needing to explicitly add it to a toctree
directive somewhere.
"""
2020-06-17 23:05:22 +02:00
import re
from collections import defaultdict
from sphinx.errors import DocumentError
2020-06-17 23:05:22 +02:00
from pathlib import Path
from os.path import abspath, dirname, join as pathjoin, relpath
_IGNORE_FILES = []
_SOURCEDIR_NAME = "source"
_SOURCE_DIR = pathjoin(dirname(dirname(abspath(__file__))), _SOURCEDIR_NAME)
_TOC_FILE = pathjoin(_SOURCE_DIR, "toc.md")
2020-07-27 21:09:13 +02:00
_NO_REMAP_STARTSWITH = [
"http://",
"https://",
"github:",
"feature-request",
"report-bug",
"issue",
"bug-report",
]
2021-10-21 21:04:14 +02:00
# remove these prefixes from the url
_STRIP_PREFIX = [
"../../api/",
"../api/",
"./api/",
"api/",
"api:",
]
2020-07-08 21:51:14 +02:00
TXT_REMAPS = {
2020-07-11 10:41:33 +02:00
"Developer Central": "Evennia Components overview",
2020-07-08 22:20:37 +02:00
"Getting Started": "Setup Quickstart",
2020-07-08 21:51:14 +02:00
}
URL_REMAPS = {
2020-07-11 10:41:33 +02:00
"Developer-Central": "Components/Components-Overview",
"Tutorials": "Howtos/Howtos-Overview",
"../Howtos/Beginner-Tutorial/Directory-Overview": "Gamedir-Overview",
"Howtos/Beginner-Tutorial/Directory-Overview": "Gamedir-Overview",
"Beginner-Tutorial/Directory-Overview": "Gamedir-Overview",
2020-07-08 22:20:37 +02:00
"Directory-Overview": "Gamedir-Overview",
"../Setup/Getting-Started": "Setup-Quickstart",
"Setup/Getting-Started": "Setup-Quickstart",
"Getting-Started": "Setup-Quickstart",
"First-Steps-Coding": "Beginner-Tutorial-Part1",
"../Howtos/Beginner-Tutorial/Adding-Command-Tutorial": "Adding-Commands",
"Howtos/Beginner-Tutorial/Adding-Command-Tutorial": "Adding-Commands",
"Beginner-Tutorial/Adding-Command-Tutorial": "Adding-Commands",
2020-07-08 22:20:37 +02:00
"Adding-Command-Tutorial": "Adding-Commands",
2020-07-08 22:52:46 +02:00
"CmdSet": "Command-Sets",
"Spawner": "Prototypes",
2020-07-08 22:52:46 +02:00
"issue": "github:issue",
"issues": "github:issue",
"bug": "github:issue",
"bug-report": "github:issue",
2020-07-12 18:10:13 +02:00
"./Default-Command-Help": "api:evennia.commands.default#modules",
"../Components/Default-Command-Help": "api:evennia.commands.default#modules",
"../../../Components/Default-Command-Help": "api:evennia.commands.default#modules",
2021-10-21 21:04:14 +02:00
"./Locks.md#permissions": "Permissions",
"modules": "Default-Commands.md",
2020-07-08 21:51:14 +02:00
}
2020-07-11 10:41:33 +02:00
_USED_REFS = {}
2020-07-08 21:51:14 +02:00
_CURRFILE = None
2020-07-27 21:09:13 +02:00
def auto_link_remapper(no_autodoc=False):
"""
2020-07-12 18:10:13 +02:00
- Auto-Remaps links to fit with the actual document file structure. Requires
all doc files to have a unique name.
- Creates source/toc.md file
"""
global _CURRFILE
2020-07-12 18:10:13 +02:00
print(" -- Auto-Remapper starting.")
2020-07-11 10:41:33 +02:00
def _get_rel_source_ref(path):
"""Get the path relative the source/ dir"""
pathparts = path.split("/")
# we allow a max of 4 levels of nesting in the source dir
ind = pathparts[-5:].index(_SOURCEDIR_NAME)
# get the part after source/
2020-07-27 21:09:13 +02:00
pathparts = pathparts[-5 + 1 + ind :]
url = "/".join(pathparts)
# get the reference, without .md
url = url.rsplit(".", 1)[0]
return url
toc_map = {}
docref_map = defaultdict(dict)
2020-06-17 23:05:22 +02:00
for path in Path(_SOURCE_DIR).rglob("*.md"):
# find the source/ part of the path and strip it out
if path.name in _IGNORE_FILES:
2020-06-17 23:05:22 +02:00
# this is the name including .md
continue
sourcepath = path.as_posix()
# get name and url relative to source/
fname = path.name.rsplit(".", 1)[0]
src_url = _get_rel_source_ref(sourcepath)
# check for duplicate files
if fname in toc_map:
duplicate_src_url = toc_map[fname]
raise DocumentError(
f" Tried to add {src_url}.md, but a file {duplicate_src_url}.md already exists.\n"
" Evennia's auto-link-corrector does not accept doc-files with the same \n"
2020-07-27 21:09:13 +02:00
" name, even in different folders. Rename one.\n"
)
toc_map[fname] = src_url
# find relative links to all other files
for targetpath in Path(_SOURCE_DIR).rglob("*.md"):
targetname = targetpath.name.rsplit(".", 1)[0]
targetpath = targetpath.as_posix()
url = relpath(targetpath, dirname(sourcepath))
2020-07-11 00:14:13 +02:00
if not "/" in url:
# need to be explicit or there will be link ref collisions between
# e.g. TickerHandler page and TickerHandle api node
url = "./" + url
docref_map[sourcepath][targetname] = url.rsplit(".", 1)[0]
# normal reference-links [txt](urls)
2020-07-27 21:09:13 +02:00
ref_regex = re.compile(
2021-10-21 21:04:14 +02:00
r"\[(?P<txt>[\n\w -\[\]\`]+?)\]\((?P<url>.+?)\)", re.I + re.S + re.U + re.M
2020-07-27 21:09:13 +02:00
)
# in document references
2020-07-27 21:09:13 +02:00
ref_doc_regex = re.compile(
2021-10-21 21:04:14 +02:00
r"\[(?P<txt>[\n\w -\`]+?)\]:\s+?(?P<url>.+?)(?=$|\n)", re.I + re.S + re.U + re.M
2020-07-27 21:09:13 +02:00
)
2020-06-17 23:05:22 +02:00
def _sub(match):
2020-07-08 22:20:37 +02:00
# inline reference links
2020-07-11 10:41:33 +02:00
global _USED_REFS
2020-06-17 23:05:22 +02:00
grpdict = match.groupdict()
2020-07-27 21:09:13 +02:00
txt, url = grpdict["txt"], grpdict["url"]
2020-06-18 01:02:15 +02:00
2020-07-08 21:51:14 +02:00
txt = TXT_REMAPS.get(txt, txt)
url = URL_REMAPS.get(url, url)
2021-10-21 21:04:14 +02:00
for strip_prefix in _STRIP_PREFIX:
if url.startswith(strip_prefix):
2022-02-08 13:03:52 +01:00
url = url[len(strip_prefix) :]
2021-10-21 21:04:14 +02:00
2020-06-21 19:29:59 +02:00
if any(url.startswith(noremap) for noremap in _NO_REMAP_STARTSWITH):
2021-10-21 21:04:14 +02:00
# skip regular http/s urls etc
2020-06-21 19:29:59 +02:00
return f"[{txt}]({url})"
2021-10-21 21:04:14 +02:00
if url.startswith("evennia."):
# api link - we want to remove legacy #reference and remove .md
2022-02-08 13:03:52 +01:00
if "#" in url:
_, url = url.rsplit("#", 1)
2021-10-21 21:04:14 +02:00
if url.endswith(".md"):
2022-02-08 13:03:52 +01:00
url, _ = url.rsplit(".", 1)
2021-10-21 21:04:14 +02:00
return f"[{txt}]({url})"
fname, *part = url.rsplit("/", 1)
fname = part[0] if part else fname
fname, *anchor = fname.rsplit("#", 1)
if ".md" in fname:
2020-06-18 01:02:15 +02:00
fname = fname.rsplit(".", 1)[0]
2020-07-12 18:10:13 +02:00
2021-10-21 21:04:14 +02:00
if not _CURRFILE.endswith("toc.md"):
_USED_REFS[fname] = url
2020-07-11 10:41:33 +02:00
2021-10-21 21:04:14 +02:00
if _CURRFILE in docref_map and fname in docref_map[_CURRFILE]:
cfilename = _CURRFILE.rsplit("/", 1)[-1]
2022-02-08 13:03:52 +01:00
urlout = (
docref_map[_CURRFILE][fname] + ".md" + ("#" + anchor[0].lower() if anchor else "")
)
2021-10-21 21:04:14 +02:00
if urlout != url:
print(f" {cfilename}: [{txt}]({url}) -> [{txt}]({urlout})")
else:
urlout = url
2020-07-11 10:41:33 +02:00
2020-06-17 23:05:22 +02:00
return f"[{txt}]({urlout})"
def _sub_doc(match):
2020-07-08 22:20:37 +02:00
# reference links set at the bottom of the page
2020-07-11 10:41:33 +02:00
global _USED_REFS
grpdict = match.groupdict()
2020-07-27 21:09:13 +02:00
txt, url = grpdict["txt"], grpdict["url"]
2020-06-18 01:02:15 +02:00
2020-07-08 21:51:14 +02:00
txt = TXT_REMAPS.get(txt, txt)
url = URL_REMAPS.get(url, url)
2021-10-21 21:04:14 +02:00
for strip_prefix in _STRIP_PREFIX:
if url.startswith(strip_prefix):
2022-02-08 13:03:52 +01:00
url = url[len(strip_prefix) :]
2021-10-21 21:04:14 +02:00
2020-06-21 19:29:59 +02:00
if any(url.startswith(noremap) for noremap in _NO_REMAP_STARTSWITH):
2020-07-11 10:41:33 +02:00
return f"[{txt}]: {url}"
2020-06-21 19:29:59 +02:00
2020-06-18 01:02:15 +02:00
if "http" in url and "://" in url:
urlout = url
2021-10-21 21:04:14 +02:00
elif url.startswith("evennia."):
# api link - we want to remove legacy #reference
2022-02-08 13:03:52 +01:00
if "#" in url:
_, urlout = url.rsplit("#", 1)
2020-06-18 01:02:15 +02:00
else:
fname, *part = url.rsplit("/", 1)
fname = part[0] if part else fname
fname = fname.rsplit(".", 1)[0]
fname, *anchor = fname.rsplit("#", 1)
2020-07-12 18:10:13 +02:00
if not _CURRFILE.endswith("toc.md"):
_USED_REFS[fname] = url
2020-07-11 10:41:33 +02:00
if _CURRFILE in docref_map and fname in docref_map[_CURRFILE]:
2020-07-11 10:41:33 +02:00
cfilename = _CURRFILE.rsplit("/", 1)[-1]
2020-07-27 21:09:13 +02:00
urlout = docref_map[_CURRFILE][fname] + ("#" + anchor[0] if anchor else "")
2020-07-12 18:10:13 +02:00
if urlout != url:
2020-07-11 10:41:33 +02:00
print(f" {cfilename}: [{txt}]: {url} -> [{txt}]: {urlout}")
2020-06-18 01:02:15 +02:00
else:
urlout = url
2020-07-11 10:41:33 +02:00
return f"[{txt}]: {urlout}"
2020-06-17 23:05:22 +02:00
# replace / correct links in all files
count = 0
for path in sorted(Path(_SOURCE_DIR).rglob("*.md"), key=lambda p: p.name):
# from pudb import debugger;debugger.Debugger().set_trace()
_CURRFILE = path.as_posix()
2020-07-27 21:09:13 +02:00
with open(path, "r") as fil:
2020-06-17 23:05:22 +02:00
intxt = fil.read()
outtxt = ref_regex.sub(_sub, intxt)
outtxt = ref_doc_regex.sub(_sub_doc, outtxt)
2020-06-17 23:05:22 +02:00
if intxt != outtxt:
2020-07-27 21:09:13 +02:00
with open(path, "w") as fil:
2020-06-17 23:05:22 +02:00
fil.write(outtxt)
count += 1
2020-07-11 10:41:33 +02:00
print(f" -- Auto-relinked links in {path.name}")
2020-06-17 23:05:22 +02:00
if count > 0:
2020-07-11 10:41:33 +02:00
print(f" -- Auto-corrected links in {count} documents.")
2020-06-17 23:05:22 +02:00
2020-07-12 18:10:13 +02:00
for (fname, src_url) in sorted(toc_map.items(), key=lambda tup: tup[0]):
2021-10-21 21:04:14 +02:00
if fname not in _USED_REFS and not src_url.startswith("api/"):
2020-07-12 18:10:13 +02:00
print(f" ORPHANED DOC: no refs found to {src_url}.md")
2020-06-17 23:05:22 +02:00
# write tocfile
# with open(_TOC_FILE, "w") as fil:
# fil.write("```{toctree}\n")
# if not no_autodoc:
# fil.write("- [API root](api/evennia-api.rst)")
# for ref in sorted(toc_map.values()):
# if ref == "toc":
# continue
# # if not "/" in ref:
# # ref = "./" + ref
2020-07-12 18:10:13 +02:00
# # linkname = ref.replace("-", " ")
# fil.write(f"\n{ref}") # - [{linkname}]({ref})")
# # we add a self-reference so the toc itself is also a part of a toctree
# fil.write("\n```\n\n```{toctree}\n :hidden:\n\ntoc\n```")
# print(" -- File toc.md updated.")
2020-07-12 18:10:13 +02:00
print(" -- Auto-Remapper finished.")
2020-07-11 10:41:33 +02:00
2020-07-27 21:09:13 +02:00
if __name__ == "__main__":
2020-07-12 18:10:13 +02:00
auto_link_remapper()