更新数据原创

white0dew · Oct 11, 2024 · 470a0a4 · 470a0a4
1 parent 735a754
commit 470a0a4
Show file tree

Hide file tree

Showing 9 changed files with 403 additions and 7 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -67,4 +67,5 @@ ENV PORT 3020
 # server.js is created by next build from the standalone output
 # https://nextjs.org/docs/pages/api-reference/next-config-js/output
 # 启动服务
-CMD ["node", "server.js"]
+CMD ["node", "server.js"]
+# CMD ["sh", "-c", "node .js && node server.js"]
diff --git a/elog-doge-upload.js b/elog-doge-upload.js
@@ -5,7 +5,6 @@ const {
 } = require("@aws-sdk/client-s3");
 // 节省体积，只引入 S3 服务（推荐）
 const S3 = require("aws-sdk/clients/s3");
-// const dogecloudApi = require("./lib/doge-sdk"); // 请替换为正确的dogecloudApi函数路径
 var axios = require("axios");
 var crypto = require("crypto");
 var querystring = require("querystring");

diff --git a/next.config.js b/next.config.js
@@ -1,7 +1,10 @@
 const { withContentlayer } = require("next-contentlayer2");
-
+const NextOSSPlugin = require("next-oss-webpack-plugin");
+const isProd = process.env.NODE_ENV === "production";
 /** @type {import('next').NextConfig} */
 const nextConfig = {
+  // 配置 CDN 地址
+  // assetPrefix: isProd ? "https://cdn.example.com/offernow/" : "",
   swcMinify: true,
   output: "standalone",
   pageExtensions: ["ts", "tsx", "js", "jsx", "md", "mdx"],
@@ -36,11 +39,29 @@ const nextConfig = {
       },
     ],
   },
-  webpack: (config) => {
+  webpack: (config, { buildId }) => {
     config.module.rules.push({
       test: /\.svg$/,
       use: ["@svgr/webpack"],
     });
+
+    // if (isProd && buildId) {
+    //   config.plugins.push(
+    //     new NextOSSPlugin({
+    //       region: "oss-cn-hangzhou", // bucket所在区域
+    //       accessKeyId: process.env.R2_ACCESSKEYI,
+    //       accessKeySecret: process.env.R2_SECRET_ACCESSKEY,
+    //       bucket: process.env.R2_BUCKET,
+
+    //       filter: (assert) => /\.js$/.test(assert),
+    //       assetPrefix: `${assetPrefix}/_next/`, // 上传资源前缀
+    //       customizedOssPaths: [
+    //         // 替换为 /:buildId/page/xxx.js ，使能正常访问
+    //         { pattern: /bundles\/pages/g, replace: `${buildId}/page` },
+    //       ],
+    //     })
+    //   );
+    // }
     return config;
   },
 };

diff --git a/package.json b/package.json
@@ -36,15 +36,18 @@
     "cross-env": "^7.0.3",
     "crypto": "^1.0.1",
     "dayjs": "^1.11.11",
+    "dotenv": "^16.4.5",
     "embla-carousel-react": "^8.0.4",
     "esbuild": "^0.21.1",
     "flexsearch": "^0.7.43",
     "github-slugger": "^2.0.0",
     "hast-util-from-html-isomorphic": "^2.0.0",
     "lucide-react": "^0.378.0",
     "mdx": "^0.3.1",
+    "mime-types": "^2.1.35",
     "next": "14.2.8",
     "next-contentlayer2": "0.4.6",
+    "next-oss-webpack-plugin": "^1.0.0",
     "next-themes": "^0.3.0",
     "pliny": "^0.2.1",
     "react": "^18.3.1",
@@ -53,6 +56,7 @@
     "react-icons": "^4.12.0",
     "react-markdown": "^9.0.1",
     "react-syntax-highlighter": "^15.5.0",
+    "recursive-readdir": "^2.2.3",
     "rehype-autolink-headings": "^7.1.0",
     "rehype-citation": "^2.0.0",
     "rehype-katex": "^7.0.0",

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
diff --git a/public/sitemap.xml b/public/sitemap.xml
@@ -976,7 +976,7 @@
     <loc>https://offernow.cn/s/language/vue3/ebxce1ws74y0t8tq</loc>
     <changefreq>weekly</changefreq>
     <priority>0.5</priority>
-    <lastmod>2024-09-30T07:10:17.986Z</lastmod>
+    <lastmod>2024-09-30T12:51:31.440Z</lastmod>
   </url>
   <url>
     <loc>https://offernow.cn/s/language/vue3/ecpzrwpgo9ugf2bb</loc>
@@ -1774,7 +1774,7 @@
     <loc>https://offernow.cn/s/cs_base/os/hand_os/vpcarog0vdzlbgx3</loc>
     <changefreq>weekly</changefreq>
     <priority>0.5</priority>
-    <lastmod>2024-09-30T07:10:17.988Z</lastmod>
+    <lastmod>2024-09-30T12:51:31.443Z</lastmod>
   </url>
   <url>
     <loc>https://offernow.cn/s/cs_base/os/hand_os/ho88cbpxgbc2gpnf</loc>
@@ -2710,7 +2710,7 @@
     <loc>https://offernow.cn/s/interview/chat_view/20244/lgawhfx8pq2slyko</loc>
     <changefreq>weekly</changefreq>
     <priority>0.5</priority>
-    <lastmod>2024-09-30T07:10:17.991Z</lastmod>
+    <lastmod>2024-09-30T12:51:31.446Z</lastmod>
   </url>
   <url>
     <loc>https://offernow.cn/s/interview/chat_view/20244/iv993phgec0ybt41</loc>

diff --git a/script/auto-flush.py b/script/auto-flush.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+
+# 自动刷新网站
+# 可以使用conjob 定时运行python代码，或者使用宝塔面板进行处理
+
+
+import requests
+
+# 定义 API 的 URL
+url = "https://serverless-api-elog.vercel.app/api/github"
+
+# 定义查询参数
+params = {
+    "user": "**",
+    "repo": "**",
+    "event_type": "sync",
+    "token": "**"
+}
+
+
+
+# 发送 GET 请求
+response = requests.get(url, params=params)
+
+# 检查请求是否成功
+if response.status_code == 200:
+    # 打印响应内容
+    print("Response JSON:")
+    print(response.json())
+else:
+    # 打印错误信息
+    print(f"Request failed with status code {response.status_code}")
+    print("Response text:")
+    print(response.text)
diff --git a/script/baidu-seo-flush.py b/script/baidu-seo-flush.py
@@ -0,0 +1,108 @@
+import random
+import requests
+from bs4 import BeautifulSoup
+import time
+
+# 假设你的 sitemap 文件是一个 XML 文件，并且 URL 在 <loc> 标签中
+sitemap_url = 'https://offernow.cn/sitemap.xml'
+
+# 记录已处理URL的文件
+processed_urls_file = 'processed_urls.txt'
+
+def get_urls_from_sitemap(sitemap_url):
+    """从 sitemap 中提取所有的 URL"""
+    response = requests.get(sitemap_url)
+    soup = BeautifulSoup(response.content, 'xml')
+    urls = [loc.text for loc in soup.find_all('loc')]
+    return urls
+
+def submit_urls_to_baidu(urls, token):
+    """
+    提交URL数组到百度的普通收录工具
+
+    参数:
+    urls (list): 要提交的URL列表
+    site (str): 在搜索资源平台验证的站点
+    token (str): 在搜索资源平台申请的推送用的准入密钥
+
+    返回:
+    dict: 返回百度接口的响应
+    """
+    api_url = f"http://data.zz.baidu.com/urls?site=https://offernow.cn&token={token}"
+    headers = {
+        'Content-Type': 'text/plain'
+    }
+    data = "\n".join(urls)
+
+    response = requests.post(api_url, headers=headers, data=data)
+
+    if response.status_code == 200:
+        return response.json()
+    else:
+        print(f"Error: {response.status_code}")
+        return None
+
+def load_processed_urls():
+    """从文件中加载已处理的URL列表"""
+    try:
+        with open(processed_urls_file, 'r') as f:
+            return set(f.read().splitlines())
+    except FileNotFoundError:
+        return set()
+
+def save_processed_url(url):
+    print("保存已处理的URL")
+    """将已处理的URL保存到文件中"""
+    with open(processed_urls_file, 'a') as f:
+        f.write(url + '\n')
+    print(f"Saved URL: {url}")  # 调试信息
+
+def clear_processed_urls():
+    """清空已处理的URL文件"""
+    with open(processed_urls_file, 'w') as f:
+        f.write('')
+
+# 示例使用
+token = "M0sfIGdkUod4leN9"
+
+def main():
+    urls = get_urls_from_sitemap(sitemap_url)
+    random.shuffle(urls)  # 随机排序URL列表
+    processed_urls = load_processed_urls()
+    num = 0
+
+    for url in urls:
+        if url in processed_urls:
+            print(f"URL {url} 已处理过，跳过...")
+            continue  # 跳过已处理的URL
+
+        num += 1
+        curUrl = {url}
+        # 这里你可以按照你的需要处理每个页面的内容
+        start_time = time.time()  # 请求前时间
+
+        response = submit_urls_to_baidu(curUrl, token)
+
+        elapsed_time = time.time() - start_time  # 请求后时间
+        print(f"第{num}次，内容 {url}，耗时 {elapsed_time:.2f} 秒:")
+
+        if response:
+            print(f"推送成功的URL条数: {response.get('success')}")
+            print(f"当天剩余可推送的URL条数: {response.get('remain')}")
+            print(f"不是本站的URL: {response.get('not_same_site')}")
+            print(f"不合法的URL: {response.get('not_valid')}")
+
+        save_processed_url(url)  # 保存已处理的URL
+
+        if num >= 10:
+            break
+
+    # 检查是否所有URL都已处理完毕, 清空已处理的URL文件
+    # 通过比较两个集合的差异来判断是否所有URL都已处理完毕
+    # urls所有都在processed_urls中，才清空
+    if not set(urls) - processed_urls:
+        clear_processed_urls()
+        print("所有URL都已处理完毕，已清空已处理的URL文件。")
+
+if __name__ == '__main__':
+    main()