diff --git a/tests.py b/tests.py new file mode 100644 index 0000000..f9cf2e6 --- /dev/null +++ b/tests.py @@ -0,0 +1,122 @@ +import unittest +from v2_utils import remove_unmatched_tags +from app import app +import json,random + + +class CustomTestResult(unittest.TextTestResult): + def addSuccess(self, test): + super().addSuccess(test) + print(f"{test._testMethodName} - passed") + + +class CustomTestRunner(unittest.TextTestRunner): + resultclass = CustomTestResult + + +class TestRemoveUnmatchedTags(unittest.TestCase): + """ + Static test case input & output for check markdown handler function + """ + def test_remove_unmatched_tags_basic(self): + input_text = "
Test content

" + expected_output = "
Test content
" + self.assertEqual(remove_unmatched_tags(input_text), expected_output) + + def test_remove_unmatched_tags_unmatched_opening(self): + input_text = "
Test content" + expected_output = "
Test content
" + self.assertEqual(remove_unmatched_tags(input_text), expected_output) + + def test_remove_unmatched_tags_unmatched_closing(self): + input_text = "

Test content

" + expected_output = "

Test content

" + self.assertEqual(remove_unmatched_tags(input_text), expected_output) + + def test_remove_unmatched_tags_nested_tags(self): + input_text = "

Test content

" + expected_output = "

Test content

" + self.assertEqual(remove_unmatched_tags(input_text), expected_output) + + def test_remove_unmatched_tags_unmatched_nested_opening(self): + input_text = "

Test content

" + expected_output = "

Test content

" + self.assertEqual(remove_unmatched_tags(input_text), expected_output) + + def test_remove_unmatched_tags_unmatched_nested_closing(self): + input_text = "
Test content

" + expected_output = "
Test content
" + self.assertEqual(remove_unmatched_tags(input_text), expected_output) + + def test_remove_unmatched_tags_multiple_unmatched_tags(self): + input_text = "
Test

Content

Here" + expected_output = "
Test

Content

Here" + self.assertEqual(remove_unmatched_tags(input_text), expected_output) + + def test_remove_unmatched_tags_text_with_no_tags(self): + input_text = "Plain text with no tags" + expected_output = "Plain text with no tags" + self.assertEqual(remove_unmatched_tags(input_text), expected_output) + + def test_remove_unmatched_tags_empty_string(self): + input_text = "" + expected_output = "" + self.assertEqual(len(remove_unmatched_tags(input_text)),len(expected_output)) + + +class TestIssuesEndpoints(unittest.TestCase): + + def setUp(self): + self.app = app.test_client() + self.app.testing = True + self.issues_data = None # To store issues data for use in subsequent tests + + # Fetch issues data during setup + self._fetch_issues_data() + + def _fetch_issues_data(self): + # Validate the /issues endpoint and store the issues data + response = self.app.get('/issues') + self.assertEqual(response.status_code, 200) + + data = json.loads(response.data) + self.issues_data = data.get('issues', []) + self.assertTrue(len(self.issues_data) > 0, "No issues found in response") + + def test_get_issues_success(self): + # Check if issues data is correctly fetched + self.assertIsNotNone(self.issues_data, "Issues data is not populated") + + def test_get_issues_detail_success(self): + # Ensure the /issues endpoint was successfully called and issues data is available + if not self.issues_data: + self.skipTest("Skipping detail test as /issues endpoint did not return data") + + # Use first data from /issues response to form the endpoint URL + + index = random.randrange(1,len(self.issues_data)-1) + sample_issue = self.issues_data[index]['issues'][0] + issue_id = sample_issue['id'] + orgname = self.issues_data[index]['org_name'] + + endpoint = f'/v2/issues/{orgname}/{issue_id}' + + response = self.app.get(endpoint) + self.assertEqual(response.status_code, 200) + + def test_get_repo_detail_success(self): + # Ensure the /issues endpoint was successfully called and issues data is available + if not self.issues_data: + self.skipTest("Skipping detail test as /issues endpoint did not return data") + + # Use first data from /issues response to form the endpoint URL + index = random.randrange(1,len(self.issues_data)-1) + orgname = self.issues_data[index]['org_name'] + endpoint = f'/issues/{orgname}' + response = self.app.get(endpoint) + self.assertEqual(response.status_code, 200) + + + +if __name__ == '__main__': + unittest.main(testRunner=CustomTestRunner()) diff --git a/v2_utils.py b/v2_utils.py index 5e97cd5..5a34a64 100644 --- a/v2_utils.py +++ b/v2_utils.py @@ -27,43 +27,103 @@ def define_link_data(usernames): logging.info(f"{e}---define_link_data") return [] +def preprocess_nested_tags(text): + try: + segments = re.split(r'(<[^>]+>)', text) + tag_stack = [] + corrected_segments = [] + + for segment in segments: + if re.match(r'<[^/][^>]*>', segment): # Opening tag + tag_stack.append(segment) + corrected_segments.append(segment) + elif re.match(r']+>', segment): # Closing tag + if tag_stack and tag_stack[-1][1:].split()[0] == segment[2:].split()[0]: + tag_stack.pop() + corrected_segments.append(segment) + else: + continue # Ignore unmatched closing tag + else: + corrected_segments.append(segment) + + while tag_stack: + open_tag = tag_stack.pop() + tag_name = re.match(r'<([^ ]+)', open_tag).group(1) + corrected_segments.append(f'') + + return ''.join(corrected_segments) + + except Exception as e: + print(e,"error in preprocess_nested_tags function") + return text + + + def remove_unmatched_tags(text): try: - # Remove unmatched closing tags at the beginning of the string - text = re.sub(r'^\s*]+>\s*', '', text) + # Preprocess text to handle unmatched nested tags + text = preprocess_nested_tags(text) + # Remove unmatched closing tags at the beginning of the string + text = re.sub(r'^\s*]+>\s*', '', text) # Regex pattern to find matched or unmatched tags - pattern = re.compile(r'(<([^>]+)>.*?)|(<[^/][^>]*>.*)', re.DOTALL) + pattern = re.compile(r'(<([^>]+)>.*?)|(<[^/][^>]*>.*?)(?=<[^/][^>]*>|$)', re.DOTALL) matches = pattern.findall(text) - + + #If get text without html tags + if matches == []: + return text + cleaned_text = '' + open_tags = [] + for match in matches: if match[0]: # Full matched ... pairs cleaned_text += match[0] elif match[2]: # Unmatched opening tags + # Add the tag to the list of open tags + tag = re.match(r'<([^/][^>]*)>', match[2]) + if tag: + tag_name = tag.group(1).split()[0] + open_tags.append(tag_name) cleaned_text += match[2] + + # Close any unmatched opening tags + while open_tags: + tag = open_tags.pop() + cleaned_text += f'' + + # Remove extra unmatched angle brackets + cleaned_text = re.sub(r'>+', '>', cleaned_text) + cleaned_text = re.sub(r'<+', '<', cleaned_text) + #For front end renders add ul tags + if not cleaned_text.strip().startswith("
    "): + return f"
      {cleaned_text}
    " + return cleaned_text + except Exception as e: print(e) return text - - + + + def week_data_formatter(html_content, type): try: # Use regex to find week titles (e.g., Week 1, Week 2) and their corresponding task lists - week_matches = re.findall(r'(Week \d+)', html_content) - tasks_per_week = re.split(r'Week \d+', html_content)[1:] # Split the content by weeks and skip the first empty split + week_matches = re.findall(r'Week\s*-?\s*\d+', html_content) + tasks_per_week = re.split(r'Week\s*-?\s*\d+', html_content)[1:] # Split the content by weeks and skip the first empty split weekly_updates = [] if type == "Learnings": # tasks_per_week = re.split(r'

    Week \d+

    ', html_content)[1:] - tasks_per_week = re.split(r'(<.*?>Week \d+<.*?>)', html_content)[1:] - tasks_per_week = [tasks_per_week[i] for i in range(1, len(tasks_per_week), 2)] + tasks_per_week = re.split(r'Week\s*-?\s*\d+', html_content)[1:] + tasks_per_week = [tasks_per_week[i] for i in range(0, len(tasks_per_week))] for i, week in enumerate(week_matches): task_list_html = tasks_per_week[i] if i < len(tasks_per_week) else "" weekly_updates.append({