-
Notifications
You must be signed in to change notification settings - Fork 0
/
sharepoint_client.py
125 lines (119 loc) · 5.75 KB
/
sharepoint_client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import requests
import time
from requests.exceptions import RequestException
from requests_ntlm import HttpNtlmAuth
from configuration import Configuration
from sharepoint_utils import print_and_log
class SharePoint:
def __init__(self, logger):
self.logger = logger
configuration = Configuration(
file_name="sharepoint_connector_config.yml", logger=logger
)
self.configs = configuration.configurations
self.retry_count = int(self.configs.get("retry_count"))
self.domain = self.configs.get("sharepoint.domain")
self.username = self.configs.get("sharepoint.username")
self.password = self.configs.get("sharepoint.password")
def get(self, rel_url, query, param_name):
""" Invokes a GET call to the Sharepoint server
:param rel_url: relative url to the sharepoint farm
:param query: query for passing arguments to the url
:param param_name: parameter name whether it is sites, lists, list_items, drive_items, permissions or deindex
Returns:
Response of the GET call
"""
request_headers = {
"accept": "application/json;odata=verbose",
"content-type": "application/json;odata=verbose"
}
response_list = {"d": {"results": []}}
paginate_query = True
skip, top = 0, 5000
while paginate_query:
if param_name in ["sites", "lists"]:
paginate_query = query + f"&$skip={skip}&$top={top}"
elif skip == 0 and param_name in ["list_items", "drive_items"]:
paginate_query = query + f"&$top={top}"
elif param_name in ["permission_users", "permission_groups", "deindex", "attachment"]:
paginate_query = query
url = rel_url + paginate_query
skip += 5000
retry = 0
while retry <= self.retry_count:
try:
response = requests.get(
url,
auth=HttpNtlmAuth(self.domain + "\\" + self.username, self.password),
headers=request_headers
)
if response.status_code == requests.codes.ok:
if param_name in ["sites", "lists"] and response:
response_data = response.json()
response_result = response_data.get("d", {}).get("results")
response_list["d"]["results"].extend(response_result)
if len(response_result) < 5000:
paginate_query = None
break
elif param_name in ["list_items", "drive_items"] and response:
response_data = response.json()
response_list["d"]["results"].extend(response_data.get("d", {}).get("results"))
paginate_query = response_data.get("d", {}).get("__next", False)
break
else:
return response
elif response.status_code >= 400 and response.status_code < 500:
if not (param_name == 'deindex' and response.status_code == 404):
print_and_log(
self.logger,
"exception",
"Error: %s. Error while fetching from the sharepoint, url: %s."
% (response.reason, url)
)
return response
else:
print_and_log(
self.logger,
"error",
"Error while fetching from the sharepoint, url: %s. Retry Count: %s. Error: %s"
% (url, retry, response.reason)
)
# This condition is to avoid sleeping for the last time
if retry < self.retry_count:
time.sleep(2 ** retry)
retry += 1
paginate_query = None
continue
except RequestException as exception:
print_and_log(
self.logger,
"exception",
"Error while fetching from the sharepoint, url: %s. Retry Count: %s. Error: %s"
% (url, retry, exception)
)
# This condition is to avoid sleeping for the last time
if retry < self.retry_count:
time.sleep(2 ** retry)
else:
return False
retry += 1
if retry > self.retry_count:
return response
return response_list
def get_query(self, start_time, end_time, param_name):
""" returns the query for each objects
:param start_time: start time of the interval for fetching the documents
:param end_time: end time of the interval for fetching the documents
Returns:
query: query for each object
"""
query = ""
if param_name in ["sites", "lists"]:
query = f"?$filter=(LastItemModifiedDate ge datetime'{start_time}') and (LastItemModifiedDate le datetime'{end_time}')"
else:
query = f"$filter=(Modified ge datetime'{start_time}') and (Modified le datetime'{end_time}')"
if param_name == "list_items":
query = "?" + query
else:
query = "&" + query
return query