diff --git a/src/components/sources/DocumentModal.tsx b/src/components/sources/DocumentModal.tsx new file mode 100644 index 0000000..849cab8 --- /dev/null +++ b/src/components/sources/DocumentModal.tsx @@ -0,0 +1,126 @@ +import React from "react"; +import { + Modal, + ModalOverlay, + ModalContent, + ModalHeader, + ModalBody, + ModalCloseButton, + Text, + Link, + UnorderedList, + ListItem, + Box, + VStack, +} from "@chakra-ui/react"; + +interface DocumentModalProps { + isOpen: boolean; + onClose: () => void; + document: Record | null; + isLoading: boolean; + isError: boolean; + error?: string; +} + +const formatValue = (value: any): string => { + if (typeof value === "string") { + return value; + } else if (typeof value === "number" || typeof value === "boolean") { + return value.toString(); + } else if (value instanceof Date) { + return value.toISOString(); + } + return ""; +}; + +const RenderField = ({ name, value }: { name: string; value: any }) => { + if (Array.isArray(value)) { + return ( + + {name}: + + {value.map((item, index) => ( + + {typeof item === "object" ? ( + + ) : ( + formatValue(item) + )} + + ))} + + + ); + } else if (typeof value === "object" && value !== null) { + return ( + + {name}: + + + + + ); + } else { + return ( + + + {name}: + {" "} + {formatValue(value)} + + ); + } +}; + +const RenderObject = ({ object }: { object: Record }) => { + return ( + + {Object.entries(object).map(([key, value]) => ( + + ))} + + ); +}; + +const DocumentModal: React.FC = ({ + isOpen, + onClose, + document, + isLoading, + isError, + error, +}) => { + return ( + + + + {document?.title || "Document Details"} + + + {isLoading && Loading document content...} + {isError && ( + Error loading document: {error} + )} + {!isLoading && !isError && document && ( + + {document.url && ( + + {document.url} + + )} + + + )} + + + + ); +}; + +export default DocumentModal; diff --git a/src/components/sources/Documents.tsx b/src/components/sources/Documents.tsx new file mode 100644 index 0000000..948bb20 --- /dev/null +++ b/src/components/sources/Documents.tsx @@ -0,0 +1,146 @@ +import React, { useState } from "react"; +import { FaEye } from "react-icons/fa"; + +import { useSourceDocuments } from "@/hooks/useSourceDocuments"; +import { useDocumentContent } from "@/hooks/useDocumentContent"; +import { formatTimeAgo } from "@/utils/dateUtils"; +import DocumentModal from "./DocumentModal"; + +interface SourceDocumentsProps { + domain: string; +} + +const trimUrl = (url: string, domain: string): string => { + const domainPattern = new RegExp( + `^(https?:\/\/)?(www\.)?${domain.replace(".", ".")}/?`, + "i" + ); + const trimmed = url.replace(domainPattern, ""); + return trimmed.startsWith("/") ? trimmed : `/${trimmed}`; +}; + +const Documents: React.FC = ({ domain }) => { + const [page, setPage] = useState(1); + const { sourceDocuments, total, isLoading, isError, error } = + useSourceDocuments(domain, page); + const [selectedDocumentUrl, setSelectedDocumentUrl] = useState( + null + ); + const { + documentContent, + isLoading: isContentLoading, + isError: isContentError, + error: contentError, + } = useDocumentContent(selectedDocumentUrl); + + if (isLoading) return
Loading source documents...
; + if (isError) + return
Error loading source documents: {error.message}
; + + const totalPages = Math.ceil(total / 10); + + const handleViewDocument = (url: string) => { + setSelectedDocumentUrl(url); + }; + + return ( +
+

Documents for {domain}

+
+ + + + + + + + + + + {sourceDocuments?.map((doc, index) => ( + + + + + + + ))} + +
+ Title + + URL + + Indexed At +
+
+ {doc.title} +
+
+ + +
+ {formatTimeAgo(doc.indexed_at)} + + {new Date(doc.indexed_at).toLocaleString()} + +
+
+ +
+
+
+ + + Page {page} of {totalPages} + + +
+ setSelectedDocumentUrl(null)} + document={documentContent} + isLoading={isContentLoading} + isError={isContentError} + error={contentError?.message} + /> +
+ ); +}; + +export default Documents; diff --git a/src/hooks/useDocumentContent.ts b/src/hooks/useDocumentContent.ts new file mode 100644 index 0000000..4d0be53 --- /dev/null +++ b/src/hooks/useDocumentContent.ts @@ -0,0 +1,39 @@ +import { useQuery } from "@tanstack/react-query"; + +interface DocumentContent { + title: string; + url: string; + content: string; + indexed_at: string; +} + +const fetchDocumentContent = async (url: string): Promise => { + const response = await fetch("/api/elasticSearchProxy/getDocumentContent", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ url }), + }); + const data = await response.json(); + if (!data.success) throw new Error(data.message); + return data.data; +}; + +export const useDocumentContent = (url: string) => { + const { data, isLoading, isError, error } = useQuery({ + queryKey: ["documentContent", url], + queryFn: () => fetchDocumentContent(url), + enabled: !!url, + cacheTime: Infinity, + staleTime: Infinity, + refetchOnWindowFocus: false, + }); + + return { + documentContent: data, + isLoading, + isError, + error, + }; +}; diff --git a/src/hooks/useSourceDocuments.ts b/src/hooks/useSourceDocuments.ts new file mode 100644 index 0000000..0500a95 --- /dev/null +++ b/src/hooks/useSourceDocuments.ts @@ -0,0 +1,47 @@ +import { useQuery } from "@tanstack/react-query"; + +interface Document { + title: string; + url: string; + indexed_at: string; +} + +interface SourceDocumentsResponse { + documents: Document[]; + total: number; +} + +const fetchSourceDocuments = async ( + domain: string, + page: number +): Promise => { + const response = await fetch("/api/elasticSearchProxy/sourceDocuments", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ domain, page }), + }); + const data = await response.json(); + if (!data.success) throw new Error(data.message); + return data.data; +}; + +export const useSourceDocuments = (domain: string, page: number) => { + const { data, isLoading, isError, error } = useQuery< + SourceDocumentsResponse, + Error + >({ + queryKey: ["sourceDocuments", domain, page], + queryFn: () => fetchSourceDocuments(domain, page), + cacheTime: Infinity, + staleTime: Infinity, + refetchOnWindowFocus: false, + }); + + return { + sourceDocuments: data?.documents, + total: data?.total, + isLoading, + isError, + error, + }; +}; diff --git a/src/hooks/useSources.ts b/src/hooks/useSources.ts new file mode 100644 index 0000000..47a7ef5 --- /dev/null +++ b/src/hooks/useSources.ts @@ -0,0 +1,45 @@ +import { useQuery } from "@tanstack/react-query"; +import { EsSourcesResponse } from "@/types"; + +type FetchSources = (url?: string) => Promise; + +const fetchSources: FetchSources = async (url) => { + return fetch(url ?? "/api/elasticSearchProxy/sources", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({}), + }) + .then(async (res) => { + const data = await res.json(); + if (!data.success) { + const errMessage = data.message || "Error while fetching sources"; + throw new Error(errMessage); + } + return data.data?.result; + }) + .catch((err) => { + throw new Error(err.message ?? "Error fetching sources"); + }); +}; + +export const useSources = () => { + const { data, isLoading, isError, error } = useQuery< + EsSourcesResponse, + Error + >({ + queryKey: ["sources"], + queryFn: () => fetchSources(), + cacheTime: Infinity, + staleTime: Infinity, + refetchOnWindowFocus: false, + }); + + return { + sources: data, + isLoading, + isError, + error, + }; +}; diff --git a/src/pages/api/elasticSearchProxy/getDocumentContent.ts b/src/pages/api/elasticSearchProxy/getDocumentContent.ts new file mode 100644 index 0000000..4e63c79 --- /dev/null +++ b/src/pages/api/elasticSearchProxy/getDocumentContent.ts @@ -0,0 +1,55 @@ +import type { NextApiRequest, NextApiResponse } from "next"; +import { client } from "@/config/elasticsearch"; + +export default async function handler( + req: NextApiRequest, + res: NextApiResponse +) { + if (req.method !== "POST") { + return res.status(405).json({ + error: + "Invalid request method. This endpoint only supports POST requests.", + }); + } + + const { url } = req.body; + + if (!url) { + return res.status(400).json({ + error: "URL is required", + }); + } + + try { + const result = await client.search({ + index: process.env.INDEX, + body: { + query: { + term: { "url.keyword": url }, + }, + size: 1, + }, + }); + + if (result.hits.hits.length === 0) { + return res.status(404).json({ + success: false, + message: "Document not found", + }); + } + + const document = result.hits.hits[0]._source; + + return res.status(200).json({ + success: true, + data: document, + }); + } catch (error) { + console.error(error); + return res.status(400).json({ + success: false, + message: + error.message || "An error occurred while fetching document content", + }); + } +} diff --git a/src/pages/api/elasticSearchProxy/sourceDocuments.ts b/src/pages/api/elasticSearchProxy/sourceDocuments.ts new file mode 100644 index 0000000..3590b59 --- /dev/null +++ b/src/pages/api/elasticSearchProxy/sourceDocuments.ts @@ -0,0 +1,63 @@ +import type { NextApiRequest, NextApiResponse } from "next"; +import { client } from "@/config/elasticsearch"; + +export default async function handler( + req: NextApiRequest, + res: NextApiResponse +) { + if (req.method !== "POST") { + return res.status(405).json({ + error: + "Invalid request method. This endpoint only supports POST requests.", + }); + } + + const { domain, page = 1 } = req.body; + + if (!domain) { + return res.status(400).json({ + error: "Domain is required", + }); + } + + const size = 10; + const from = (page - 1) * size; + + try { + const result = await client.search({ + index: process.env.INDEX, + body: { + from, + size, + query: { + term: { "domain.keyword": domain }, + }, + _source: ["title", "url", "indexed_at"], + sort: [{ indexed_at: "desc" }], + }, + }); + + const documents = result.hits.hits.map((hit) => hit._source); + + // Handle both possible types of total + const total = + typeof result.hits.total === "number" + ? result.hits.total + : result.hits.total.value; + + return res.status(200).json({ + success: true, + data: { + documents, + total, + }, + }); + } catch (error) { + console.error(error); + return res.status(400).json({ + success: false, + message: + error.message || "An error occurred while fetching document details", + }); + } +} diff --git a/src/pages/api/elasticSearchProxy/sources.ts b/src/pages/api/elasticSearchProxy/sources.ts new file mode 100644 index 0000000..8c5ff7b --- /dev/null +++ b/src/pages/api/elasticSearchProxy/sources.ts @@ -0,0 +1,71 @@ +import type { NextApiRequest, NextApiResponse } from "next"; +import { client } from "@/config/elasticsearch"; + +interface DomainAggregationBucket { + key: string; + doc_count: number; + last_indexed: { + value: number; + }; +} + +export default async function handler( + req: NextApiRequest, + res: NextApiResponse +) { + if (req.method !== "POST") { + return res.status(405).json({ + error: + "Invalid request method. This endpoint only supports POST requests.", + }); + } + + try { + const result = await client.search({ + index: process.env.INDEX, + body: { + size: 0, + aggs: { + domains: { + terms: { + field: "domain.keyword", + size: 1000, // Adjust based on the expected number of unique domains + }, + aggs: { + last_indexed: { + max: { + field: "indexed_at", + }, + }, + }, + }, + }, + }, + }); + + const domainBuckets = ( + result.aggregations?.domains as { + buckets: DomainAggregationBucket[]; + } + ).buckets; + + const sources = domainBuckets.map((bucket) => ({ + domain: bucket.key, + documentCount: bucket.doc_count, + lastScraped: bucket.last_indexed.value || null, + })); + + return res.status(200).json({ + success: true, + data: { + result: sources, + }, + }); + } catch (error) { + console.error(error); + return res.status(400).json({ + success: false, + message: error.message || "An error occurred while fetching sources data", + }); + } +} diff --git a/src/pages/sources.tsx b/src/pages/sources.tsx new file mode 100644 index 0000000..1861d4c --- /dev/null +++ b/src/pages/sources.tsx @@ -0,0 +1,154 @@ +import React, { useState, useMemo } from "react"; + +import NavBar from "@/components/navBar/NavBar"; +import Footer from "@/components/footer/Footer"; +import Documents from "@/components/sources/Documents"; +import { useSources } from "@/hooks/useSources"; +import { formatTimeAgo } from "@/utils/dateUtils"; +import { Source } from "@/types"; + +const SourcesPage: React.FC = () => { + const { sources, isLoading, isError, error } = useSources(); + const [sortConfig, setSortConfig] = useState<{ + key: keyof Source; + direction: "ascending" | "descending"; + } | null>(null); + const [expandedSource, setExpandedSource] = useState(null); + + const toggleExpand = (domain: string) => { + setExpandedSource(expandedSource === domain ? null : domain); + }; + + const sortedSources = useMemo(() => { + if (!sources) return []; + const sortableItems = [...sources]; + if (sortConfig !== null) { + sortableItems.sort((a, b) => { + if (a[sortConfig.key] < b[sortConfig.key]) { + return sortConfig.direction === "ascending" ? -1 : 1; + } + if (a[sortConfig.key] > b[sortConfig.key]) { + return sortConfig.direction === "ascending" ? 1 : -1; + } + return 0; + }); + } + return sortableItems; + }, [sources, sortConfig]); + + const sortBy = (key: keyof Source) => { + let direction: "ascending" | "descending" = "ascending"; + if ( + sortConfig && + sortConfig.key === key && + sortConfig.direction === "ascending" + ) { + direction = "descending"; + } + setSortConfig({ key, direction }); + }; + + const getSortIndicator = (key: keyof Source) => { + if (sortConfig && sortConfig.key === key) { + return sortConfig.direction === "ascending" ? " ▲" : " ▼"; + } + return ""; + }; + + return ( +
+ +
+

Data Sources

+ {isLoading ? ( +
Loading...
+ ) : isError ? ( +
Error: {error.message}
+ ) : ( +
+ + + + + + + + + + + {sortedSources.map((source, index) => ( + + toggleExpand(source.domain)} + > + + + + + + {expandedSource === source.domain && ( + + + + )} + + ))} + +
sortBy("domain")} + > + Domain{getSortIndicator("domain")} + sortBy("lastScraped")} + > + Last Scraped{getSortIndicator("lastScraped")} + sortBy("documentCount")} + > + Document Count{getSortIndicator("documentCount")} +
+ + {expandedSource === source.domain ? "▼" : "▶"} + + + e.stopPropagation()} + > + {source.domain} + + +
+ {formatTimeAgo(source.lastScraped)} + + {new Date(source.lastScraped).toLocaleString()} + +
+
+ {source.documentCount} +
+ +
+
+ )} +
+
+
+ ); +}; + +export default SourcesPage; diff --git a/src/types.ts b/src/types.ts index 943af5f..83496af 100644 --- a/src/types.ts +++ b/src/types.ts @@ -60,3 +60,11 @@ export type EsSearchResponse = SearchResponse< unknown, Record >; + +export interface Source { + domain: string; + lastScraped: string; + documentCount: number; +} + +export type EsSourcesResponse = Source[]; diff --git a/src/utils/dateUtils.ts b/src/utils/dateUtils.ts new file mode 100644 index 0000000..4aa5b0e --- /dev/null +++ b/src/utils/dateUtils.ts @@ -0,0 +1,18 @@ +export const formatTimeAgo = (date: string | number) => { + const now = new Date(); + const past = new Date(date); + const diffTime = Math.abs(now.getTime() - past.getTime()); + const diffHours = Math.floor(diffTime / (1000 * 60 * 60)); + const diffDays = Math.floor(diffTime / (1000 * 60 * 60 * 24)); + + if (diffHours < 24) { + if (diffHours === 0) return "Less than an hour ago"; + if (diffHours === 1) return "1 hour ago"; + return `${diffHours} hours ago`; + } + if (diffDays === 1) return "Yesterday"; + if (diffDays < 7) return `${diffDays} days ago`; + if (diffDays < 30) return `${Math.floor(diffDays / 7)} weeks ago`; + if (diffDays < 365) return `${Math.floor(diffDays / 30)} months ago`; + return `${Math.floor(diffDays / 365)} years ago`; +};