-
Notifications
You must be signed in to change notification settings - Fork 0
/
summarizeTos.js
103 lines (84 loc) · 3.25 KB
/
summarizeTos.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
// Assuming you're using Compromise.js
import nlp from './node_modules/compromise/builds/compromise.min.js';
import { log, logLevels } from './debugger.js';
/**
* Summarizes the ToS text by breaking it into sections and summarizing each section individually.
* @param {CheerioStatic} $ The Cheerio object representing the loaded ToS HTML.
* @return {string} The summarized ToS text.
*/
function summarizeTos($) {
try {
console.log("Starting ToS summarization...");
const sections = identifySections($);
console.log("Identified sections:", sections);
const sectionSummaries = sections.map(section => {
try {
const sectionText = extractSectionText(section);
console.log("Extracting text from section:", section.heading);
const summary = summarizeSection(sectionText);
console.log("Section summary:", summary);
return summary;
} catch (error) {
console.error("Error summarizing section:", section.heading, error);
return ""; // Return an empty string in case of error
}
});
const overallSummary = combineSummaries(sectionSummaries);
console.log("Overall summary:", overallSummary);
return overallSummary;
} catch (error) {
console.error("Error in summarizeTos:", error);
// Handle the error gracefully, perhaps by returning a default message or an error indicator
return "An error occurred while summarizing the Terms of Service.";
}
}
function identifySections($) {
try {
log(logLevels.DEBUG, "Identifying sections using Cheerio");
// Use Cheerio selectors to identify sections
const sections = $('h1, h2, h3, h4, h5, h6').map((i, el) => {
return {
heading: $(el).text(),
content: $(el).nextUntil('h1, h2, h3, h4, h5, h6').text()
};
}).get();
log(logLevels.INFO, `Identified sections: ${JSON.stringify(sections)}`);
return sections;
} catch (error) {
log(logLevels.ERROR, `Error identifying sections: ${error.message}`);
return []; // Return an empty array in case of error
}
}
function extractSectionText(section) {
try {
return section.content;
} catch (error) {
console.error("Error extracting section text:", error);
return ""; // Return an empty string in case of error
}
}
function summarizeSection(sectionText) {
try {
const doc = nlp(sectionText);
const summary = doc.sentences().slice(0, 3).text(); // Basic summarization
// More advanced summarization techniques could be explored here
return summary;
} catch (error) {
console.error("Error summarizing section text:", error);
return ""; // Return an empty string in case of error
}
}
function combineSummaries(sectionSummaries) {
log(logLevels.DEBUG, `Combining section summaries: ${JSON.stringify(sectionSummaries)}`);
let overallSummary = "";
for (let i = 0; i < sectionSummaries.length; i++) {
const section = sections[i]; // Assuming 'sections' is accessible in this scope
const heading = section.heading;
const summary = sectionSummaries[i];
// Add the heading and summary to the overall summary
overallSummary += `## ${heading}\n${summary}\n\n`;
}
log(logLevels.INFO, `Overall summary: ${overallSummary}`);
return overallSummary;
}
export { summarizeTos };