Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Confluence] added new method get tables from confluence page. #1280

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 42 additions & 1 deletion atlassian/confluence.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import os
import time
import json

from bs4 import BeautifulSoup
import lxml
from requests import HTTPError
import requests
from deprecated import deprecated
Expand Down Expand Up @@ -356,6 +357,46 @@ def get_page_by_id(self, page_id, expand=None, status=None, version=None):

return response

def get_tables_from_page(self, page_id):
"""
Scraps tables added to confluence page
:param page_id: integer confluence page_id
:return: json object with page_id, number_of_tables_in_page and list of list tables_content representing scrapepd tables
"""
try:
page_content = self.get_page_by_id(page_id, expand="body.storage")["body"]["storage"]["value"]

if page_content:
tables_raw = [
[[cell.text for cell in row("th") + row("td")] for row in table("tr")]
for table in BeautifulSoup(page_content, features="lxml")("table")
]
if len(tables_raw) > 0:
log.info("Found: ", len(tables_raw), "for pageid: ", page_id)
return json.dumps(
{
"page_id": page_id,
"number_of_tables_in_page": len(tables_raw),
"tables_content": tables_raw,
}
)
else:
return {
"No tables found for page: ": page_id,
}
else:
return {"Page content is empty"}
except HTTPError as e:
if e.response.status_code == 404:
# Raise ApiError as the documented reason is ambiguous
log.error("Couldn't retrieve tables from page", page_id)
raise ApiError(
"There is no content with the given pageid, pageid params is not an integer "
"or the calling user does not have permission to view the page",
reason=e,
)
except Exception as e:
log.error("error occured" + e)
def get_page_labels(self, page_id, prefix=None, start=None, limit=None):
"""
Returns the list of labels on a piece of Content.
Expand Down
2 changes: 2 additions & 0 deletions docs/confluence.rst
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ Page actions
# Add comment into page
confluence.add_comment(page_id, text)

# Fetch tables from Confluence page
confluence.get_page_tables(page_id)
Template actions
----------------

Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ six
oauthlib
requests_oauthlib
requests-kerberos==0.14.0
lxml
beautifulsoup4
Loading