diff --git a/clamscan/clamscan.c b/clamscan/clamscan.c
index 2b10a1023b..3c35d6aa0c 100644
--- a/clamscan/clamscan.c
+++ b/clamscan/clamscan.c
@@ -295,6 +295,7 @@ void help(void)
     mprintf(LOGG_INFO, "    --phishing-sigs[=yes(*)/no]          Enable email signature-based phishing detection\n");
     mprintf(LOGG_INFO, "    --phishing-scan-urls[=yes(*)/no]     Enable URL signature-based phishing detection\n");
     mprintf(LOGG_INFO, "    --heuristic-alerts[=yes(*)/no]       Heuristic alerts\n");
+    mprintf(LOGG_INFO, "    --store-html-urls[=yes(*)/no]        Store html URLs in metadata\n");
     mprintf(LOGG_INFO, "    --heuristic-scan-precedence[=yes/no(*)] Stop scanning as soon as a heuristic match is found\n");
     mprintf(LOGG_INFO, "    --normalize[=yes(*)/no]              Normalize html, script, and text files. Use normalize=no for yara compatibility\n");
     mprintf(LOGG_INFO, "    --scan-pe[=yes(*)/no]                Scan PE files\n");
diff --git a/clamscan/manager.c b/clamscan/manager.c
index db3a8f46b6..8c75e75010 100644
--- a/clamscan/manager.c
+++ b/clamscan/manager.c
@@ -1557,6 +1557,10 @@ int scanmanager(const struct optstruct *opts)
         options.general |= CL_SCAN_GENERAL_HEURISTICS;
     }
 
+    if (optget(opts, "store-html-urls")->enabled) {
+        options.general |= CL_SCAN_STORE_HTML_URLS;
+    }
+
     /* TODO: Remove deprecated option in a future feature release */
     if ((optget(opts, "block-max")->enabled) ||
         (optget(opts, "alert-exceeds-max")->enabled)) {
diff --git a/common/optparser.c b/common/optparser.c
index dd99f43eb2..8caf869c99 100644
--- a/common/optparser.c
+++ b/common/optparser.c
@@ -389,6 +389,7 @@ const struct clam_option __clam_options[] = {
     {"PhishingScanURLs", "phishing-scan-urls", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 1, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "Scan URLs found in mails for phishing attempts using heuristics.", "yes"},
 
     {"HeuristicAlerts", "heuristic-alerts", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 1, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "In some cases (eg. complex malware, exploits in graphic files, and others),\nClamAV uses special algorithms to provide accurate detection. This option\ncontrols the algorithmic detection.", "yes"},
+    {"StoreHTMLUrls", "store-html-urls", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 1, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "Store URLs found in HTML <form and <a tags.", "yes"},
 
     {"HeuristicScanPrecedence", "heuristic-scan-precedence", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 0, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "Allow heuristic match to take precedence.\nWhen enabled, if a heuristic scan (such as phishingScan) detects\na possible virus/phish it will stop scan immediately. Recommended, saves CPU\nscan-time.\nWhen disabled, virus/phish detected by heuristic scans will be reported only\nat the end of a scan. If an archive contains both a heuristically detected\nvirus/phish, and a real malware, the real malware will be reported.\nKeep this disabled if you intend to handle \"Heuristics.*\" viruses\ndifferently from \"real\" malware.\nIf a non-heuristically-detected virus (signature-based) is found first,\nthe scan is interrupted immediately, regardless of this config option.", "yes"},
 
diff --git a/libclamav/clamav.h b/libclamav/clamav.h
index 6e12699266..a8f84055b4 100644
--- a/libclamav/clamav.h
+++ b/libclamav/clamav.h
@@ -198,6 +198,7 @@ struct cl_scan_options {
 #define CL_SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED   0x800  /* alert when detecting stripped social security numbers */
 #define CL_SCAN_HEURISTIC_STRUCTURED_CC             0x1000 /* alert when detecting credit card numbers */
 #define CL_SCAN_HEURISTIC_BROKEN_MEDIA              0x2000 /* alert if a file does not match the identified file format, works with JPEG, TIFF, GIF, PNG */
+#define CL_SCAN_STORE_HTML_URLS                     0x4000 /* Store urls found in html <a and <form tags*/
 
 /* mail scanning options */
 #define CL_SCAN_MAIL_PARTIAL_MESSAGE                0x1
diff --git a/libclamav/htmlnorm.c b/libclamav/htmlnorm.c
index edd1bc00d7..a090e731f6 100644
--- a/libclamav/htmlnorm.c
+++ b/libclamav/htmlnorm.c
@@ -490,6 +490,7 @@ static inline void html_tag_contents_append(struct tag_contents *cont, const uns
     uint32_t mbchar = 0;
     if (!begin || !end)
         return;
+
     for (i = cont->pos; i < MAX_TAG_CONTENTS_LENGTH && (begin < end); i++) {
         uint8_t c = *begin++;
         if (mbchar && (c < 0x80 || mbchar >= 0x10000)) {
@@ -687,6 +688,7 @@ static bool cli_html_normalise(cli_ctx *ctx, int fd, m_area_t *m_area, const cha
     uint32_t mbchar  = 0;
     uint32_t mbchar2 = 0;
 
+
     /*
      * Initialize stack buffers.
      */
@@ -1929,6 +1931,7 @@ static bool cli_html_normalise(cli_ctx *ctx, int fd, m_area_t *m_area, const cha
         cli_js_destroy(js_state);
         js_state = NULL;
     }
+
     html_tag_arg_free(&tag_args);
     if (!m_area) {
         fclose(stream_in);
diff --git a/libclamav/others.h b/libclamav/others.h
index 8cebf78d35..4ffb7d0a50 100644
--- a/libclamav/others.h
+++ b/libclamav/others.h
@@ -552,6 +552,7 @@ extern LIBCLAMAV_EXPORT int have_rar;
 #define SCAN_HEURISTICS (ctx->options->general & CL_SCAN_GENERAL_HEURISTICS)
 #define SCAN_HEURISTIC_PRECEDENCE (ctx->options->general & CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE)
 #define SCAN_UNPRIVILEGED (ctx->options->general & CL_SCAN_GENERAL_UNPRIVILEGED)
+#define STORE_HTML_URLS   (ctx->options->general & CL_SCAN_STORE_HTML_URLS)
 
 #define SCAN_PARSE_ARCHIVE (ctx->options->parse & CL_SCAN_PARSE_ARCHIVE)
 #define SCAN_PARSE_ELF (ctx->options->parse & CL_SCAN_PARSE_ELF)
diff --git a/libclamav/scanners.c b/libclamav/scanners.c
index 8cc19297af..d9a577f02b 100644
--- a/libclamav/scanners.c
+++ b/libclamav/scanners.c
@@ -2082,6 +2082,69 @@ static cl_error_t cli_ole2_tempdir_scan_for_xlm_and_images(const char *dir, cli_
     return ret;
 }
 
+const char * const HTML_URLS_JSON_KEY = "HTMLUrls";
+
+
+
+static bool is_url(const char * const str){
+
+#define MATCH(str, prefix) \
+    do { \
+        if (str && (strlen(str) > strlen(prefix)) \
+                && (0 == strncasecmp(str, prefix, strlen(prefix)))) { \
+            bRet = true; \
+            goto done; \
+        } \
+    } while (0);
+
+    bool bRet = false;
+
+    MATCH(str, "https://");
+    MATCH(str, "http://");
+    MATCH(str, "ftp://");
+done:
+    return bRet;
+#undef MATCH
+}
+static void save_urls(cli_ctx * ctx, tag_arguments_t * hrefs) {
+    int i = 0;
+    bool haveOne = false;
+    if (NULL == hrefs) {
+        return;
+    }
+
+    if (ctx->wrkproperty != ctx->properties) {
+        return;
+    }
+
+    if (!(STORE_HTML_URLS && SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL))) {
+        return;
+    }
+
+    for (i = 0; i < hrefs->count; i++){
+        if (is_url((const char *) hrefs->value[i])) {
+            haveOne = true;
+            break;
+        }
+    }
+    
+    if (!haveOne){
+        return;
+    }
+
+    json_object *ary = cli_jsonarray(ctx->wrkproperty, HTML_URLS_JSON_KEY );
+    if (ary) {
+        for (i = 0; i < hrefs->count; i++){
+            if (is_url((const char *) hrefs->value[i])){
+                cli_jsonstr(ary, NULL, (const char *) hrefs->value[i]);
+            }
+        }
+    } else {
+        cli_dbgmsg("[cli_scanhtml] Failed to add \"%s\" entry JSON array\n", HTML_URLS_JSON_KEY );
+    }
+    
+}
+
 static cl_error_t cli_scanhtml(cli_ctx *ctx)
 {
     cl_error_t status = CL_SUCCESS;
@@ -2113,7 +2176,15 @@ static cl_error_t cli_scanhtml(cli_ctx *ctx)
 
     cli_dbgmsg("cli_scanhtml: using tempdir %s\n", tempname);
 
-    (void)html_normalise_map(ctx, map, tempname, NULL, ctx->dconf);
+    /* Output JSON Summary Information */
+    if (STORE_HTML_URLS && SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
+        tag_arguments_t hrefs = {0};
+        hrefs.scanContents = 1;
+        (void)html_normalise_map(ctx, map, tempname, &hrefs, ctx->dconf);
+        save_urls(ctx, &hrefs);
+    } else {
+        (void)html_normalise_map(ctx, map, tempname, NULL, ctx->dconf);
+    }
 
     snprintf(fullname, 1024, "%s" PATHSEP "nocomment.html", tempname);
     fd = open(fullname, O_RDONLY | O_BINARY);