From 327e2670f0a0e19724b51323181c637bd99d27fb Mon Sep 17 00:00:00 2001 From: Tyler Sengia Date: Tue, 12 Dec 2023 20:35:27 -0500 Subject: [PATCH 1/4] Add test for page that returns 404 --- src/tests.rs | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/src/tests.rs b/src/tests.rs index d15620d..8a8a6bd 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -87,3 +87,51 @@ async fn test_two_pages() { // Make sure there are two pages in the page map assert_eq!(map.len(), 2); } + + + +#[tokio::test] +async fn test_missing_page() { + let mut server = Server::new(); + + let url = server.url(); + let parsed_url = Url::parse(url.as_str()).unwrap(); + + let mock = server.mock("GET", "/") + .with_status(201) + .with_header("content-type", "text/html") + .with_body("This points to a missing page!") + .create(); + + let missing_page_mock = server.mock("GET", "/page2.html") + .with_status(404) + .create(); + + let mut spider_crab = SpiderCrab::new(&[url.as_str()]); + + let success = spider_crab.visit_website(url.as_str()).await; + + // Make sure the HTTP request was made to the first page + mock.assert(); + + // Make sure the HTTP request was made to the missing page + missing_page_mock.assert(); + + // Make sure that visit _website() returned true + assert!(!success); + + let graph = &spider_crab.graph; + // Make sure that the page graph contains two pages + assert_eq!(graph.node_count(), 2); + + // Make sure there is only one link in the page graph + assert_eq!(graph.edge_count(), 1); + + let map = &spider_crab.map; + + // Make sure that the page map contains the mock page + assert!(map.contains_key(&parsed_url)); + + // Make sure there are two pages in the page map + assert_eq!(map.len(), 2); +} From b88f59e59ef08af2d237d6603bd84de392734e99 Mon Sep 17 00:00:00 2001 From: Tyler Sengia Date: Tue, 12 Dec 2023 20:35:45 -0500 Subject: [PATCH 2/4] Fix case when page returns HTTP error code --- src/algo.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/algo.rs b/src/algo.rs index da0b25d..fc9e219 100644 --- a/src/algo.rs +++ b/src/algo.rs @@ -92,6 +92,11 @@ pub async fn visit_page( // Record the HTTP status code page.status_code = Some(response.status()); + if !response.status().is_success() { + println!("Found bad link! {}", url); + page.good = Some(false); + return false; + } // Attempt to get the Content-Type of the page let (parse_html, content_type) = check_content_type(&response); From d8507fd7631d6f7c40f8239315b932e3cef821b9 Mon Sep 17 00:00:00 2001 From: Tyler Sengia Date: Tue, 12 Dec 2023 20:59:20 -0500 Subject: [PATCH 3/4] Add tests for empty or missing href attributes --- src/tests.rs | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 129 insertions(+), 2 deletions(-) diff --git a/src/tests.rs b/src/tests.rs index 8a8a6bd..cdf07f5 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -89,7 +89,6 @@ async fn test_two_pages() { } - #[tokio::test] async fn test_missing_page() { let mut server = Server::new(); @@ -117,7 +116,7 @@ async fn test_missing_page() { // Make sure the HTTP request was made to the missing page missing_page_mock.assert(); - // Make sure that visit _website() returned true + // Make sure that visit _website() returned false assert!(!success); let graph = &spider_crab.graph; @@ -135,3 +134,131 @@ async fn test_missing_page() { // Make sure there are two pages in the page map assert_eq!(map.len(), 2); } + + +#[tokio::test] +async fn test_missing_href() { + let mut server = Server::new(); + + let url = server.url(); + let parsed_url = Url::parse(url.as_str()).unwrap(); + + let mock = server.mock("GET", "/") + .with_status(201) + .with_header("content-type", "text/html") + .with_body("This link doesn't have an href attribute!") + .create(); + + let mut spider_crab = SpiderCrab::new(&[url.as_str()]); + + let success = spider_crab.visit_website(url.as_str()).await; + + // Make sure the HTTP request was made to the first page + mock.assert(); + + // Make sure that visit _website() returned false + assert!(!success); + + let graph = &spider_crab.graph; + // Make sure that the page graph contains one page + assert_eq!(graph.node_count(), 1); + + // Make sure there are no links in the page graph + assert_eq!(graph.edge_count(), 0); + + let map = &spider_crab.map; + + // Make sure that the page map contains the mock page + assert!(map.contains_key(&parsed_url)); + + // Make sure there is only one page in the page map + assert_eq!(map.len(), 1); +} + + +#[tokio::test] +async fn test_empty_href() { + let mut server = Server::new(); + + let url = server.url(); + let parsed_url = Url::parse(url.as_str()).unwrap(); + + let mock = server.mock("GET", "/") + .with_status(201) + .with_header("content-type", "text/html") + .with_body("This link's href attribute is empty!") + .create(); + + let mut spider_crab = SpiderCrab::new(&[url.as_str()]); + + let success = spider_crab.visit_website(url.as_str()).await; + + // Make sure the HTTP request was made to the first page + mock.assert(); + + // Make sure that visit _website() returned false + assert!(!success); + + let graph = &spider_crab.graph; + // Make sure that the page graph contains one page + assert_eq!(graph.node_count(), 1); + + // Make sure there are no links in the page graph + assert_eq!(graph.edge_count(), 0); + + let map = &spider_crab.map; + + // Make sure that the page map contains the mock page + assert!(map.contains_key(&parsed_url)); + + // Make sure there is only one page in the page map + assert_eq!(map.len(), 1); +} + + +#[tokio::test] +async fn test_empty_href_in_second_page() { + let mut server = Server::new(); + + let url = server.url(); + let parsed_url = Url::parse(url.as_str()).unwrap(); + + let mock = server.mock("GET", "/") + .with_status(201) + .with_header("content-type", "text/html") + .with_body("This is a link to page B.") + .create(); + + let mock_page_b = server.mock("GET", "/pageB.html") + .with_status(201) + .with_header("content-type", "text/html") + .with_body("This link has an empty href attribute!") + .create(); + + let mut spider_crab = SpiderCrab::new(&[url.as_str()]); + + let success = spider_crab.visit_website(url.as_str()).await; + + // Make sure the HTTP request was made to the first page + mock.assert(); + mock_page_b.assert(); + + // Make sure that visit _website() returned false + assert!(!success); + + let graph = &spider_crab.graph; + // Make sure that the page graph contains two pages + assert_eq!(graph.node_count(), 2); + + // Make sure there are is only one link in the graph + assert_eq!(graph.edge_count(), 1); + + let map = &spider_crab.map; + + // Make sure that the page map contains the mock page + assert!(map.contains_key(&parsed_url)); + assert!(map.contains_key(&parsed_url.join("pageB.html").unwrap())); + + // Make sure there are two pages in the page map + assert_eq!(map.len(), 2); +} \ No newline at end of file From 2c4e14ea4d88a9bf444d50ce268950b9294f5576 Mon Sep 17 00:00:00 2001 From: Tyler Sengia Date: Tue, 12 Dec 2023 21:18:00 -0500 Subject: [PATCH 4/4] Formatting changes --- src/tests.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/tests.rs b/src/tests.rs index cdf07f5..172c3f1 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -88,7 +88,6 @@ async fn test_two_pages() { assert_eq!(map.len(), 2); } - #[tokio::test] async fn test_missing_page() { let mut server = Server::new(); @@ -102,9 +101,7 @@ async fn test_missing_page() { .with_body("This points to a missing page!") .create(); - let missing_page_mock = server.mock("GET", "/page2.html") - .with_status(404) - .create(); + let missing_page_mock = server.mock("GET", "/page2.html").with_status(404).create(); let mut spider_crab = SpiderCrab::new(&[url.as_str()]); @@ -135,7 +132,6 @@ async fn test_missing_page() { assert_eq!(map.len(), 2); } - #[tokio::test] async fn test_missing_href() { let mut server = Server::new(); @@ -175,7 +171,6 @@ async fn test_missing_href() { assert_eq!(map.len(), 1); } - #[tokio::test] async fn test_empty_href() { let mut server = Server::new(); @@ -215,7 +210,6 @@ async fn test_empty_href() { assert_eq!(map.len(), 1); } - #[tokio::test] async fn test_empty_href_in_second_page() { let mut server = Server::new(); @@ -261,4 +255,4 @@ async fn test_empty_href_in_second_page() { // Make sure there are two pages in the page map assert_eq!(map.len(), 2); -} \ No newline at end of file +}