Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
yzqzss committed Aug 28, 2024
1 parent 36f2870 commit 8515dc5
Show file tree
Hide file tree
Showing 7 changed files with 458 additions and 0 deletions.
168 changes: 168 additions & 0 deletions altcrawlhq_server.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
package altcrawlhqserver

import (
"context"
"encoding/json"
"fmt"
"net/http"
"os"
"time"

"git.archive.org/wb/gocrawlhq"
"github.com/gin-gonic/gin"
"github.com/gorilla/websocket"
"go.mongodb.org/mongo-driver/mongo"
"go.mongodb.org/mongo-driver/mongo/options"
)

func isAuthorized(c *gin.Context) bool {
authKey := c.GetHeader("X-Auth-Key")
authSecret := c.GetHeader("X-Auth-Secret")
identifier := c.GetHeader("X-Identifier")

if authKey == "" || authSecret == "" {
return false
}

if identifier == "" {
return false
}

if authKey == "saveweb_key" && authSecret == "saveweb_sec" {
return true
}

return false
}

func websocketHandler(c *gin.Context) {
if !isAuthorized(c) {
c.JSON(http.StatusUnauthorized, gin.H{
"error": "Unauthorized",
})
return
}
upGrader := websocket.Upgrader{
CheckOrigin: func(r *http.Request) bool {
return true
},
ReadBufferSize: 1024,
WriteBufferSize: 1024,
}

ws, err := upGrader.Upgrade(c.Writer, c.Request, nil)
if err != nil {
panic(err)
}

defer func() {
closeSocketErr := ws.Close()
if closeSocketErr != nil {
panic(err)
}
}()

for {
wsMsgType, wsMsg, err := ws.ReadMessage()
if err != nil {
panic(err)
}
fmt.Printf("Message Type: %d, Message: %s\n", wsMsgType, string(wsMsg))

if wsMsgType != websocket.TextMessage {
panic("Message type is not text")
}

// {"type":"identify","payload":`+string(marshalled)+`}`
msgType := struct {
Type string `json:"type"`
}{}
if err := json.Unmarshal(wsMsg, &msgType); err != nil {
panic(err)
}

if msgType.Type != "identify" {
panic("Message type is not identify")
}

identifyMessage := struct {
Payload gocrawlhq.IdentifyMessage `json:"payload"`
}{}
if err := json.Unmarshal(wsMsg, &identifyMessage); err != nil {
panic(err)
}

fmt.Printf("Identify Message: %+v\n", identifyMessage)

err = ws.WriteJSON(struct {
Reply string `json:"reply"`
}{
Reply: "Echo...",
})
if err != nil {
panic(err)
}
}
}

type FeedRequest struct {
Size int `json:"size"`
Strategy string `json:"strategy"`
}

var MONGODB_URI string = os.Getenv("MONGODB_URI")

var mongoClient *mongo.Client

func connect_to_mongodb() {
fmt.Println("Connecting to MongoDB...")
fmt.Println("MONGODB_URI: len=", len(MONGODB_URI))
serverAPI := options.ServerAPI(options.ServerAPIVersion1)
opts := options.Client().ApplyURI(MONGODB_URI).SetServerAPIOptions(serverAPI).SetAppName("altcrawlhq").SetCompressors([]string{"zstd", "zlib", "snappy"})
fmt.Println("AppName: ", *opts.AppName)
fmt.Println("Compressors: ", opts.Compressors)

client, err := mongo.Connect(context.TODO(), opts)
if err != nil {
panic(err)
}
err = client.Ping(context.TODO(), nil)
if err != nil {
panic(err)
}
mongoClient = client
fmt.Println("Connected to MongoDB!")
}

func init() {
connect_to_mongodb()
}

func ServeHTTP() {
g := gin.New()
// g.Use(gin.Recovery())
// err := g.SetTrustedProxies(nil)
// if err != nil {
// panic(err)
// }
g.GET("/", func(c *gin.Context) {
time.Sleep(1 * time.Second)
c.JSON(http.StatusNotFound, gin.H{
"error": "Not Found",
})
})

apiGroup := g.Group("/api")
{
projectGroup := apiGroup.Group("/project/:project/")
{
projectGroup.GET("/feed", feedHandler)
projectGroup.POST("/finished", finishHandler)
projectGroup.POST("/discovered", discoveredHandler)
}
apiGroup.GET("/ws", websocketHandler)
}
if err := g.Run(); err != nil {
panic(err)
}
}
7 changes: 7 additions & 0 deletions cmd/crawlhq_server/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package main

import altcrawlhqserver "github.com/saveweb/altcrawlhq_server"

func main() {
altcrawlhqserver.ServeHTTP()
}
32 changes: 32 additions & 0 deletions discovered.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package altcrawlhqserver

import (
"fmt"
"net/http"

"git.archive.org/wb/gocrawlhq"
"github.com/gin-gonic/gin"
)

func discoveredHandler(c *gin.Context) {
project := c.Param("project")
if !isAuthorized(c) {
c.JSON(http.StatusUnauthorized, gin.H{"error": "Unauthorized"})
return
}

discoveredPayload := gocrawlhq.DiscoveredPayload{}
err := c.BindJSON(&discoveredPayload)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
fmt.Printf("Discovered: %v\n", discoveredPayload)

// discoveredPayload.SeencheckOnly 只是 inspect 一下,优先判断

discoveredResp := gocrawlhq.DiscoveredResponse{
Project: project,
}
c.JSON(http.StatusCreated, discoveredResp)
}
37 changes: 37 additions & 0 deletions feed.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package altcrawlhqserver

import (
"net/http"

"git.archive.org/wb/gocrawlhq"
"github.com/gin-gonic/gin"
)

func feedHandler(c *gin.Context) {
const emptyStatusCode = 204

project := c.Param("project")
if !isAuthorized(c) {
c.JSON(http.StatusUnauthorized, gin.H{"error": "Unauthorized"})
return
}

URLs := []gocrawlhq.URL{}

URLs = append(URLs, gocrawlhq.URL{
ID: "", // uuid, 为空则客户端自动生成
Value: "https://blog.othing.xyz/",
Path: "", // L 的数量表示 hop 深度
Via: "", // 从哪儿 discovered 的链接
})

feedResp := gocrawlhq.FeedResponse{
Project: project,
URLs: URLs,
}
if len(URLs) == 0 {
c.JSON(emptyStatusCode, gin.H{"error": "No URLs"})
return
}
c.JSON(http.StatusOK, feedResp)
}
28 changes: 28 additions & 0 deletions finish.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package altcrawlhqserver

import (
"fmt"
"net/http"

"git.archive.org/wb/gocrawlhq"
"github.com/gin-gonic/gin"
)

func finishHandler(c *gin.Context) {
project := c.Param("project")
if !isAuthorized(c) {
c.JSON(http.StatusUnauthorized, gin.H{"error": "Unauthorized"})
return
}
finishedPayload := gocrawlhq.FinishedPayload{}
err := c.BindJSON(&finishedPayload)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
fmt.Printf("Finished: %v\n", finishedPayload)
finishedResp := gocrawlhq.FinishedResponse{
Project: project,
}
c.JSON(http.StatusOK, finishedResp)
}
50 changes: 50 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
module github.com/saveweb/altcrawlhq_server

go 1.22.6

require (
git.archive.org/wb/gocrawlhq v1.2.5
github.com/gin-gonic/gin v1.10.0
github.com/gorilla/websocket v1.5.3
go.mongodb.org/mongo-driver v1.16.1
)

require (
github.com/bytedance/sonic v1.12.1 // indirect
github.com/bytedance/sonic/loader v0.2.0 // indirect
github.com/cloudwego/base64x v0.1.4 // indirect
github.com/cloudwego/iasm v0.2.0 // indirect
github.com/gabriel-vasile/mimetype v1.4.5 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/go-playground/validator/v10 v10.22.0 // indirect
github.com/gobwas/httphead v0.1.0 // indirect
github.com/gobwas/pool v0.2.1 // indirect
github.com/gobwas/ws v1.4.0 // indirect
github.com/goccy/go-json v0.10.3 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.13.6 // indirect
github.com/klauspost/cpuid/v2 v2.2.8 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/montanaflynn/stats v0.7.1 // indirect
github.com/pelletier/go-toml/v2 v2.2.3 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.12 // indirect
github.com/xdg-go/pbkdf2 v1.0.0 // indirect
github.com/xdg-go/scram v1.1.2 // indirect
github.com/xdg-go/stringprep v1.0.4 // indirect
github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect
golang.org/x/arch v0.9.0 // indirect
golang.org/x/crypto v0.26.0 // indirect
golang.org/x/net v0.28.0 // indirect
golang.org/x/sync v0.8.0 // indirect
golang.org/x/sys v0.24.0 // indirect
golang.org/x/text v0.17.0 // indirect
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
Loading

0 comments on commit 8515dc5

Please sign in to comment.