Skip to content

Commit

Permalink
feat: Handle relative links properly (#20)
Browse files Browse the repository at this point in the history
This handles relative links like `../src/my_source_file.py` that previously resulted in dead links once on Confluence.

Relative links are now replaced with GitHub links. If the file has an associated Confluence page, it links to that instead.

This doesn't handle images (#18) nor unnamed links (#19).
  • Loading branch information
KevinGDialpad authored Jun 3, 2022
1 parent 188c4b4 commit a9fbce4
Show file tree
Hide file tree
Showing 2 changed files with 254 additions and 9 deletions.
195 changes: 195 additions & 0 deletions tests/test_relative_links.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
"""Tests that relative links are updated properly"""
# Pylint seems to be confused by Pytest fixtures
# pylint: disable=redefined-outer-name
import os
import tempfile
from unittest import mock

import pytest

import wiki_sync


GH_ROOT = 'https://root/github/path/'
REPO_NAME = 'GenericRepo'


def setup():
os.environ['INPUT_SPACE-NAME'] = 'MySpace'
os.environ['INPUT_WIKI-BASE-URL'] = 'http://mywiki.atlassian.net'


@pytest.fixture
def wiki_mock():
m = mock.Mock()
m.get_page_by_title.return_value = None
return m


@pytest.fixture
def get_repo_root_mock():
return mock.patch('wiki_sync.get_repository_root')


def test_http_link(wiki_mock, get_repo_root_mock):
with tempfile.TemporaryDirectory() as repo_root:
get_repo_root_mock.return_value = repo_root

# Create the doc file with an HTTP link
doc_path = os.path.join(repo_root, 'new_doc.md')
with open(doc_path, mode='w', encoding='utf-8') as doc_file:
print('Check out this [link](https://example.org)', file=doc_file)

output = wiki_sync.get_formatted_file_content(
wiki_mock, doc_path, GH_ROOT, REPO_NAME)

assert output == 'Check out this [link|https://example.org]\n'


def test_link_to_file_in_same_folder(wiki_mock, get_repo_root_mock):
with tempfile.TemporaryDirectory() as repo_root:
get_repo_root_mock.return_value = repo_root

# Create a file that the doc will link to
linked_file_name = 'linked_file.py'
linked_doc_path = os.path.join(repo_root, linked_file_name)
write_something_to_file(linked_doc_path)

# Create the doc file with a link to the other one
doc_path = os.path.join(repo_root, 'new_doc.md')
with open(doc_path, mode='w', encoding='utf-8') as doc_file:
contents = f'Check out this [other file]({linked_file_name})'
print(contents, file=doc_file)

output = wiki_sync.get_formatted_file_content(
wiki_mock, doc_path, GH_ROOT, REPO_NAME)

expected_output = (f'Check out this'
f' [other file|{GH_ROOT}{linked_doc_path}]\n')
assert output == expected_output


def test_link_to_file_in_child_folder(wiki_mock, get_repo_root_mock):
with tempfile.TemporaryDirectory() as repo_root:
get_repo_root_mock.return_value = repo_root

# Create a file in a subfolder, that the doc will link to
os.makedirs(os.path.join(repo_root, 'foo', 'bar'))
linked_file_name = 'foo/bar/linked_file.py'
linked_doc_path = os.path.join(repo_root, linked_file_name)
write_something_to_file(linked_doc_path)

# Create the doc file with a link to the other one
doc_path = os.path.join(repo_root, 'new_doc.md')
with open(doc_path, mode='w', encoding='utf-8') as doc_file:
contents = f'Check out this [other file]({linked_file_name})'
print(contents, file=doc_file)

output = wiki_sync.get_formatted_file_content(
wiki_mock, doc_path, GH_ROOT, REPO_NAME)

expected_output = (f'Check out this'
f' [other file|{GH_ROOT}{linked_doc_path}]\n')
assert output == expected_output


def test_link_to_file_in_parent_folder(wiki_mock, get_repo_root_mock):
with tempfile.TemporaryDirectory() as repo_root:
get_repo_root_mock.return_value = repo_root

# Create a file that the doc will link to
linked_file_name = 'linked_file.py'
linked_doc_path = os.path.join(repo_root, linked_file_name)
write_something_to_file(linked_doc_path)

# Create the doc file in a subfolder, with a link to the other one
os.makedirs(os.path.join(repo_root, 'foo', 'bar'))
doc_path = os.path.join(repo_root, 'foo/bar/new_doc.md')
with open(doc_path, mode='w', encoding='utf-8') as doc_file:
contents = 'Check out this [other file](../../linked_file.py)'
print(contents, file=doc_file)

output = wiki_sync.get_formatted_file_content(
wiki_mock, doc_path, GH_ROOT, REPO_NAME)

expected_output = (f'Check out this'
f' [other file|{GH_ROOT}{linked_doc_path}]\n')
assert output == expected_output


@pytest.mark.xfail(reason='Will be implemented in #19')
def test_simplified_link(wiki_mock, get_repo_root_mock):
# Link where the name of the link is the same as the link itself
with tempfile.TemporaryDirectory() as repo_root:
get_repo_root_mock.return_value = repo_root

# Create a file that the doc will link to
linked_file_name = 'linked_file.py'
linked_doc_path = os.path.join(repo_root, linked_file_name)
write_something_to_file(linked_doc_path)

# Create the doc file with a link to the other one
doc_path = os.path.join(repo_root, 'new_doc.md')
with open(doc_path, mode='w', encoding='utf-8') as doc_file:
contents = f'Check out [{linked_file_name}]({linked_file_name})'
print(contents, file=doc_file)

output = wiki_sync.get_formatted_file_content(
wiki_mock, doc_path, GH_ROOT, REPO_NAME)

expected_link = f'[{linked_file_name}|{GH_ROOT}{linked_doc_path}'
assert output == f'Check out {expected_link}\n'


def test_link_to_non_existing_file(wiki_mock, get_repo_root_mock):
with tempfile.TemporaryDirectory() as repo_root:
get_repo_root_mock.return_value = repo_root

# Create the doc file with a link to a non-existing file
doc_path = os.path.join(repo_root, 'new_doc.md')
with open(doc_path, mode='w', encoding='utf-8') as doc_file:
contents = 'Check out this [other file](non_existing.py)'
print(contents, file=doc_file)

output = wiki_sync.get_formatted_file_content(
wiki_mock, doc_path, GH_ROOT, REPO_NAME)

assert output == 'Check out this [other file|non_existing.py]\n'


def test_link_to_file_that_exists_on_confluence(wiki_mock, get_repo_root_mock):
os.environ['INPUT_WIKI-BASE-URL'] = 'http://mywiki.atlassian.net'

with tempfile.TemporaryDirectory() as repo_root:
get_repo_root_mock.return_value = repo_root

# Create a file that the doc will link to
linked_file_name = 'linked_file.py'
linked_doc_path = os.path.join(repo_root, linked_file_name)
write_something_to_file(linked_doc_path)

# When the wiki client wants to know whether the linked file has an
# existing Confluence page, say yes
wiki_mock.get_page_by_title.return_value = {
'_links': {
'webui': '/spaces/SPACE/pages/123'
}
}

# Create the doc file with a link to the other one
doc_path = os.path.join(repo_root, 'new_doc.md')
with open(doc_path, mode='w', encoding='utf-8') as doc_file:
contents = f'Check out this [other file]({linked_file_name})'
print(contents, file=doc_file)

output = wiki_sync.get_formatted_file_content(
wiki_mock, doc_path, GH_ROOT, REPO_NAME)

wiki_link = 'http://mywiki.atlassian.net/wiki/spaces/SPACE/pages/123'
expected_output = (f'Check out this [other file|{wiki_link}]\n')
assert output == expected_output


def write_something_to_file(file_path: str) -> None:
with open(file_path, mode='w', encoding='utf-8') as doc_file:
print('Not important - file only needs to exist', file=doc_file)
68 changes: 59 additions & 9 deletions wiki_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,22 @@
"""
import logging
import os
import re
import subprocess
import sys
from typing import List
from typing import Dict, List

import atlassian
import pypandoc


# The format of a link in JIRA markdown is [link name|link]
# We only need a capture group for the link itself
# TODO handle links like [link], which happen when the link name is the same as
# the link itself
JIRA_LINK_PATTERN = re.compile(r'\[.*\|(.*)\]')


def get_files_to_sync(changed_files: str) -> List[str]:
return [f for f in changed_files.split() if should_sync_file(f)]

Expand Down Expand Up @@ -66,8 +74,7 @@ def sync_files(files: List[str]) -> bool:
read_only_warning = (
'{info:title=Imported content|icon=true}'
f'This content has been imported from the {repo_name} repository.'
"\nRelative links don't yet work as expected.\n"
'You can find (and modify) the original at'
'\nYou can find (and modify) the original at'
f' {url_root_for_file + file_path}.{{info}}\n'
'{warning:title=Do not update this page directly|icon=true}'
'Your modifications would be lost the next time the source file'
Expand All @@ -83,12 +90,10 @@ def sync_files(files: List[str]) -> bool:
continue

try:
# TODO detect and update relative links so they point to the
# corresponding JIRA page if it exists, or the GitHub file
jira_file_contents = pypandoc.convert_file(absolute_file_path,
'jira')

content = read_only_warning + jira_file_contents
formatted_content = get_formatted_file_content(
wiki_client, absolute_file_path, url_root_for_file,
repo_name)
content = read_only_warning + formatted_content
except Exception:
logging.exception('Error converting file %s:', absolute_file_path)
had_errors = True
Expand All @@ -105,6 +110,51 @@ def sync_files(files: List[str]) -> bool:
return had_errors


def get_formatted_file_content(wiki_client: atlassian.Confluence,
file_path: str, gh_root: str, repo_name: str
) -> str:
"""
Takes the absolute path of a file and returns its contents formatted as
JIRA markdown.
Updates relative links to point to a Confluence page if it exists, or to a
GitHub page.
"""
# keys are relative links; values are what they should be replaced with
links_to_replace: Dict[str, str] = {}

formated_file_contents = pypandoc.convert_file(file_path, 'jira')

for link in re.findall(JIRA_LINK_PATTERN, formated_file_contents):
# Most links are HTTP - don't waste time with them
if link.startswith('http'):
continue

target_path = os.path.join(os.path.split(file_path)[0], link)
target_path = os.path.normpath(target_path)
if not os.path.exists(target_path): # Not actually a relative link
continue

wiki_page_info = wiki_client.get_page_by_title(
os.environ['INPUT_SPACE-NAME'], f'{repo_name}/{target_path}')
if wiki_page_info:
# The link is to a file that has a Confluence page
# Let's link to the page directly
target_page_url = (os.environ['INPUT_WIKI-BASE-URL']
+ '/wiki' + wiki_page_info['_links']['webui'])
links_to_replace[link] = target_page_url
else:
# No existing Confluence page - link to GitHub
links_to_replace[link] = gh_root + target_path

# Replace relative links
for relative_link, new_link in links_to_replace.items():
formated_file_contents = formated_file_contents.replace(
f'|{relative_link}]', f'|{new_link}]')

return formated_file_contents


def get_repository_root() -> str:
repo_root = ''
with subprocess.Popen(['git', 'rev-parse', '--show-toplevel'],
Expand Down

0 comments on commit a9fbce4

Please sign in to comment.