Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] On-demand filelist fetching #1719

Open
wants to merge 52 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 51 commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
22e1c7e
Update stats.go
Kiloutre Nov 7, 2017
f226f1a
Update stats.go
Kiloutre Nov 7, 2017
89fd690
Update stats.go
Kiloutre Nov 7, 2017
58c6d09
add loading_file_list
Kiloutre Nov 7, 2017
a740a23
Update CHANGELOG.md
Kiloutre Nov 7, 2017
68da680
Update view.jet.html
Kiloutre Nov 7, 2017
1b1fd28
prevent filelist from being loaded
Kiloutre Nov 7, 2017
8e3d53c
Update view.jet.html
Kiloutre Nov 7, 2017
c3cb627
Update stats.go
Kiloutre Nov 7, 2017
557047c
Update view.jet.html
Kiloutre Nov 7, 2017
11567bf
Update view.jet.html
Kiloutre Nov 8, 2017
ef30870
Update torrent.go
Kiloutre Nov 8, 2017
1219e69
Update torrent.go
Kiloutre Nov 8, 2017
2087c27
Merge branch 'dev' into filelist-fetching
Kiloutre Nov 8, 2017
ceb02d1
Update file.go
Kiloutre Nov 8, 2017
ed8cf8c
Update stats.go
Kiloutre Nov 8, 2017
b6b1414
Update torrent.go
Kiloutre Nov 8, 2017
1d8a324
constantly preload filelists
Kiloutre Nov 8, 2017
d3480ad
Update treeview.jet.html
Kiloutre Nov 8, 2017
479eb02
Update treeview.jet.html
Kiloutre Nov 8, 2017
49ead03
Update stats.go
Kiloutre Nov 8, 2017
77e602b
Update router.go
Kiloutre Nov 8, 2017
8f28efc
Add files via upload
Kiloutre Nov 8, 2017
c1658bc
Update template.go
Kiloutre Nov 8, 2017
e703dea
Update template_test.go
Kiloutre Nov 8, 2017
ef49289
Add files via upload
Kiloutre Nov 8, 2017
18000fb
Update CHANGELOG.md
Kiloutre Nov 8, 2017
bd8a8b7
Update en-us.all.json
Kiloutre Nov 8, 2017
9f7ee65
Update filelist.jet.html
Kiloutre Nov 8, 2017
ba503c8
Update en-us.all.json
Kiloutre Nov 8, 2017
1d90dcf
Update CHANGELOG.md
Kiloutre Nov 8, 2017
9ffb5f1
Update view.jet.html
Kiloutre Nov 8, 2017
cb8604d
failsafe for empty names
Kiloutre Nov 8, 2017
08b6984
Merge branch 'dev' into filelist-fetching
Kiloutre Nov 8, 2017
c943839
remove "
Kiloutre Nov 8, 2017
1c82814
Update en-us.all.json
Kiloutre Nov 8, 2017
95daf79
Update en-us.all.json
Kiloutre Nov 8, 2017
4bd364e
Merge branch 'dev' into filelist-fetching
Kiloutre Nov 9, 2017
95cc7fd
ignore http trackers
Kiloutre Nov 10, 2017
43b16ed
Merge branch 'dev' into filelist-fetching
Kiloutre Nov 16, 2017
88a4966
fix travis
Kiloutre Nov 16, 2017
52050b2
fix travis
Kiloutre Nov 16, 2017
0cf75e3
Merge branch 'dev' into filelist-fetching
Kiloutre Nov 24, 2017
01865e8
fix missing <script>
Kiloutre Nov 24, 2017
239b953
Use client port defined in config file
Kiloutre Nov 24, 2017
c7ae1cf
Add FilesFetchingClientPort
Kiloutre Nov 24, 2017
b05f3b8
Add FilesFetchingClientPort
Kiloutre Nov 24, 2017
eb29745
Update filesize even if filelist is empty
Kiloutre Nov 24, 2017
81ef4b9
Update files.go
Kiloutre Nov 24, 2017
ec3858e
Update stats.go
Kiloutre Nov 24, 2017
1f2e85c
Merge branch 'dev' into filelist-fetching
ewhal Jan 3, 2018
050d00d
Merge branch 'dev' into filelist-fetching
Kiloutre Jan 10, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions config/default_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ torrents:
torrents:
# GenerationClientPort : Port used by the torrent client created during torrent generation
generation_client_port: 50006
# FilesFetchingClientPort: Port used by the client created by file fetching
files_fetching_client_port: 50005
# FileStorage : Location of folder that will contain generated torrent files
filestorage: ./downloads/
# TorrentStorageLink : Url of torrent file download location (eg https://your.site/somewhere/%s)
Expand Down
1 change: 1 addition & 0 deletions config/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ type TorrentsConfig struct {
Sort string `yaml:"sort,omitempty"`
Tags Tags `yaml:"tags,flow,omitempty"`
GenerationClientPort int `yaml:"generation_client_port,flow,omitempty"`
FilesFetchingClientPort int `yaml:"files_fetching_client_port,flow,omitempty"`
}

// UploadConfig : Config struct for uploading torrents
Expand Down
78 changes: 78 additions & 0 deletions controllers/torrent/files.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package torrentController

import (
"html/template"
"encoding/hex"
"net/http"
"strings"
"strconv"

"github.com/NyaaPantsu/nyaa/models/torrents"
"github.com/NyaaPantsu/nyaa/models"
"github.com/NyaaPantsu/nyaa/templates"
"github.com/NyaaPantsu/nyaa/utils/format"
"github.com/NyaaPantsu/nyaa/utils/filelist"
"github.com/Stephen304/goscrape"
"github.com/gin-gonic/gin"
)

func GetFilesHandler(c *gin.Context) {
id, _ := strconv.ParseInt(c.Param("id"), 10, 32)
torrent, err := torrents.FindByID(uint(id))

if err != nil {
c.Status(http.StatusNotFound)
return
}


if len(torrent.FileList) == 0 {
var blankScrape models.Scrape
ScrapeFiles(format.InfoHashToMagnet(strings.TrimSpace(torrent.Hash), torrent.Name, GetTorrentTrackers(torrent)...), torrent, blankScrape, true)
}

folder := filelist.FileListToFolder(torrent.FileList, "root")
templates.TorrentFileList(c, torrent.ToJSON(), folder)
}

// ScrapeFiles : Scrape torrent files
func ScrapeFiles(magnet string, torrent *models.Torrent, currentStats models.Scrape, statsExists bool) (error, []FileJSON) {
if client == nil {
err := initClient()
if err != nil {
return err, []FileJSON{}
}
}

t, _ := client.AddMagnet(magnet)
<-t.GotInfo()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it's only for the filelist, you should detect before if you have the torrent file and then if you have it already, load the files from it (since you won't have to wait to connect to peers to have them). You can check how I access to a torrent file information in the upload process here with tfile the content of the torrent file

torrent, err := metainfo.Load(tfile)

In the opposite, if you don't have it, you should save it after the scrapping so you save ressource power

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might also be better to make it in a go func() when you try to scrap. So you don't keep the connection between the user and the server hanging. It might a while before yhe scrapper connects to a peer and gets the information.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That itself is not a problem, since the torrent page load is not affected by this. The filelist fetching is generally done with JS, if the user doesn't have JS then a /files/:torrentid page is loaded but the user will known full well that if that page takes time to load, it's because it's loading the filelist
So it's not really necessary here


infoHash := t.InfoHash()
dst := make([]byte, hex.EncodedLen(len(t.InfoHash())))
hex.Encode(dst, infoHash[:])

var UDP []string

for _, tracker := range t.Metainfo().AnnounceList[0] {
if strings.HasPrefix(tracker, "udp") {
UDP = append(UDP, tracker)
}
}
var results goscrape.Result
if len(UDP) != 0 {
udpscrape := goscrape.NewBulk(UDP)
results = udpscrape.ScrapeBulk([]string{torrent.Hash})[0]
}
t.Drop()
return nil, UpdateTorrentStats(torrent, results, currentStats, t.Files(), statsExists)
}

// FileJSON for file model in json,
type FileJSON struct {
Path string `json:"path"`
Filesize template.HTML `json:"filesize"`
}

func fileSize(filesize int64) template.HTML {
return template.HTML(format.FileSize(filesize))
}
1 change: 1 addition & 0 deletions controllers/torrent/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
func init() {
router.Get().Any("/download/:hash", DownloadTorrent)
router.Get().Any("/stats/:id", GetStatsHandler)
router.Get().Any("/files/:id", GetFilesHandler)

torrentRoutes := router.Get().Group("/torrent", middlewares.LoggedInMiddleware())
{
Expand Down
144 changes: 114 additions & 30 deletions controllers/torrent/stats.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package torrentController

import (
"path/filepath"
"strconv"
"strings"
"net/url"
Expand All @@ -9,93 +10,176 @@ import (
"github.com/NyaaPantsu/nyaa/models/torrents"
"github.com/NyaaPantsu/nyaa/models"
"github.com/NyaaPantsu/nyaa/config"
"github.com/NyaaPantsu/nyaa/utils/log"
"github.com/NyaaPantsu/nyaa/utils/format"
"github.com/Stephen304/goscrape"
"github.com/gin-gonic/gin"

"github.com/anacrolix/dht"
"github.com/anacrolix/torrent"
"github.com/bradfitz/slice"
)

var client *torrent.Client

func initClient() error {
clientConfig := torrent.Config{
DHTConfig: dht.ServerConfig{
StartingNodes: dht.GlobalBootstrapAddrs,
},
ListenAddr: ":" + strconv.Itoa(config.Get().Torrents.FilesFetchingClientPort),
}
cl, err := torrent.NewClient(&clientConfig)
if err != nil {
log.Errorf("error creating client: %s", err)
return err
}
client = cl
return nil
}

// ViewHeadHandler : Controller for getting torrent stats
func GetStatsHandler(c *gin.Context) {
id, err := strconv.ParseInt(c.Param("id"), 10, 32)
if err != nil {
return
}

torrent, err := torrents.FindRawByID(uint(id))

updateTorrent, err := torrents.FindByID(uint(id))
if err != nil {
return
}

var CurrentData models.Scrape
statsExists := !(models.ORM.Where("torrent_id = ?", id).Find(&CurrentData).RecordNotFound())

if statsExists {
if statsExists && c.Request.URL.Query()["files"] == nil {
//Stats already exist, we check if the torrent stats have been scraped already very recently and if so, we stop there to avoid abuse of the /stats/:id route
if (CurrentData.Seeders == 0 && CurrentData.Leechers == 0 && CurrentData.Completed == 0) && time.Since(CurrentData.LastScrape).Minutes() <= config.Get().Scrape.MaxStatScrapingFrequencyUnknown {
if isEmptyScrape(CurrentData) && time.Since(CurrentData.LastScrape).Minutes() <= config.Get().Scrape.MaxStatScrapingFrequencyUnknown {
//Unknown stats but has been scraped less than X minutes ago (X being the limit set in the config file)
return
}
if (CurrentData.Seeders != 0 || CurrentData.Leechers != 0 || CurrentData.Completed != 0) && time.Since(CurrentData.LastScrape).Minutes() <= config.Get().Scrape.MaxStatScrapingFrequency {
if !isEmptyScrape(CurrentData) && time.Since(CurrentData.LastScrape).Minutes() <= config.Get().Scrape.MaxStatScrapingFrequency {
//Known stats but has been scraped less than X minutes ago (X being the limit set in the config file)
return
}
}

var Trackers []string
if len(torrent.Trackers) > 3 {
for _, line := range strings.Split(torrent.Trackers[3:], "&tr=") {
tracker, error := url.QueryUnescape(line)
if error == nil && strings.HasPrefix(tracker, "udp") {
Trackers = append(Trackers, tracker)
}
//Cannot scrape from http trackers so don't put them in the array
}
}
Trackers := GetTorrentTrackers(updateTorrent)

for _, tracker := range config.Get().Torrents.Trackers.Default {
if !contains(Trackers, tracker) && strings.HasPrefix(tracker, "udp") {
Trackers = append(Trackers, tracker)
var stats goscrape.Result
var torrentFiles []FileJSON

if c.Request.URL.Query()["files"] != nil {
if len(updateTorrent.FileList) > 0 {
return
}
err, torrentFiles = ScrapeFiles(format.InfoHashToMagnet(strings.TrimSpace(updateTorrent.Hash), updateTorrent.Name, Trackers...), updateTorrent, CurrentData, statsExists)
if err != nil {
return
}
} else {
//Single() returns an array which contain results for each torrent Hash it is fed, since we only feed him one we want to directly access the results
stats = goscrape.Single(Trackers, []string{
updateTorrent.Hash,
})[0]
UpdateTorrentStats(updateTorrent, stats, CurrentData, []torrent.File{}, statsExists)
}

stats := goscrape.Single(Trackers, []string{
torrent.Hash,
})[0]
//Single() returns an array which contain results for each torrent Hash it is fed, since we only feed him one we want to directly access the results


//If we put seeders on -1, the script instantly knows the fetching did not give any result, avoiding having to check all three stats below and in view.jet.html's javascript
if stats.Seeders == 0 && stats.Leechers == 0 && stats.Completed == 0 {
if isEmptyResult(stats) {
stats.Seeders = -1
}

c.JSON(200, gin.H{
"seeders": stats.Seeders,
"leechers": stats.Leechers,
"downloads": stats.Completed,
"filelist": torrentFiles,
"totalsize": fileSize(updateTorrent.Filesize),
})

return
}

// UpdateTorrentStats : Update stats & filelist if files are specified, otherwise just stats
func UpdateTorrentStats(torrent *models.Torrent, stats goscrape.Result, currentStats models.Scrape, Files []torrent.File, statsExists bool) (JSONFilelist []FileJSON) {
if stats.Seeders == -1 {
stats.Seeders = 0
}

if !statsExists {
torrent.Scrape = torrent.Scrape.Create(uint(id), uint32(stats.Seeders), uint32(stats.Leechers), uint32(stats.Completed), time.Now())
//Create entry in the DB because none exist
torrent.Scrape = torrent.Scrape.Create(torrent.ID, uint32(stats.Seeders), uint32(stats.Leechers), uint32(stats.Completed), time.Now())
//Create a stat entry in the DB because none exist
} else {
//Entry in the DB already exists, simply update it
if (CurrentData.Seeders == 0 && CurrentData.Leechers == 0 && CurrentData.Completed == 0) || (stats.Seeders != 0 && stats.Leechers != 0 && stats.Completed != 0 ) {
torrent.Scrape = &models.Scrape{uint(id), uint32(stats.Seeders), uint32(stats.Leechers), uint32(stats.Completed), time.Now()}
if isEmptyScrape(currentStats) || !isEmptyResult(stats) {
torrent.Scrape = &models.Scrape{torrent.ID, uint32(stats.Seeders), uint32(stats.Leechers), uint32(stats.Completed), time.Now()}
} else {
torrent.Scrape = &models.Scrape{uint(id), uint32(CurrentData.Seeders), uint32(CurrentData.Leechers), uint32(CurrentData.Completed), time.Now()}
torrent.Scrape = &models.Scrape{torrent.ID, uint32(currentStats.Seeders), uint32(currentStats.Leechers), uint32(currentStats.Completed), time.Now()}
}
//Only overwrite stats if the old one are Unknown OR if the current ones are not unknown, preventing good stats from being turned into unknown own but allowing good stats to be updated to more reliable ones
//Only overwrite stats if the old one are Unknown OR if the new ones are not unknown, preventing good stats from being turned into unknown but allowing good stats to be updated to more reliable ones
torrent.Scrape.Update(false)
}

if len(Files) > 1 {
files, err := torrent.CreateFileList(Files)

if err != nil {
return
}

JSONFilelist = make([]FileJSON, 0, len(files))
for _, f := range files {
JSONFilelist = append(JSONFilelist, FileJSON{
Path: filepath.Join(f.Path()...),
Filesize: fileSize(f.Filesize),
})
}

// Sort file list by lowercase filename
slice.Sort(JSONFilelist, func(i, j int) bool {
return strings.ToLower(JSONFilelist[i].Path) < strings.ToLower(JSONFilelist[j].Path)
})
} else if len(Files) == 1 {
torrent.Filesize = Files[0].Length()
torrent.Update(false)
}

return
}

// GetTorrentTrackers : Get the torrent trackers and add the default ones if they are missing
func GetTorrentTrackers(torrent *models.Torrent) []string {
var Trackers []string
if len(torrent.Trackers) > 3 {
for _, line := range strings.Split(torrent.Trackers[3:], "&tr=") {
tracker, error := url.QueryUnescape(line)
if error == nil && strings.HasPrefix(tracker, "udp") {
Trackers = append(Trackers, tracker)
}
//Cannot scrape from http trackers only keep UDP ones
}
}

for _, tracker := range config.Get().Torrents.Trackers.Default {
if !contains(Trackers, tracker) && strings.HasPrefix(tracker, "udp") {
Trackers = append(Trackers, tracker)
}
}
return Trackers
}

func isEmptyResult(stats goscrape.Result) bool {
return stats.Seeders == 0 && stats.Leechers == 0 && stats.Completed == 0
}

func isEmptyScrape(stats models.Scrape) bool {
return stats.Seeders == 0 && stats.Leechers == 0 && stats.Completed == 0
}

func contains(s []string, e string) bool {
for _, a := range s {
if a == e {
Expand Down
11 changes: 10 additions & 1 deletion models/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,18 @@ func (f *File) SetPath(path []string) error {
// Filename : Returns the filename of the file
func (f *File) Filename() string {
path := f.Path()
if len(path) == 0 {
return ""
}
return path[len(path)-1]
}

// FilenameWithoutExtension : Returns the filename of the file without the extension
func (f *File) FilenameWithoutExtension() string {
path := f.Path()
if len(path) == 0 {
return ""
}
fileName := path[len(path)-1]
index := strings.LastIndex(fileName, ".")

Expand All @@ -71,10 +77,13 @@ func (f *File) FilenameWithoutExtension() string {
// FilenameExtension : Returns the extension of a filename, or an empty string
func (f *File) FilenameExtension() string {
path := f.Path()
if len(path) == 0 {
return ""
}
fileName := path[len(path)-1]
index := strings.LastIndex(fileName, ".")

if index == -1 {
if index == -1 || index+1 == len(fileName){
return ""
}

Expand Down
21 changes: 21 additions & 0 deletions models/torrent.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/NyaaPantsu/nyaa/utils/format"
"github.com/NyaaPantsu/nyaa/utils/log"
"github.com/NyaaPantsu/nyaa/utils/sanitize"
"github.com/anacrolix/torrent"
"github.com/bradfitz/slice"
"github.com/fatih/structs"
)
Expand Down Expand Up @@ -457,6 +458,26 @@ func (t *Torrent) Update(unscope bool) (int, error) {
return http.StatusOK, nil
}

func (t *Torrent) CreateFileList(Files []torrent.File) ([]File, error) {
var createdFilelist []File
t.Filesize = 0

for _, uploadedFile := range Files {
file := File{TorrentID: t.ID, Filesize: uploadedFile.Length()}
err := file.SetPath(uploadedFile.FileInfo().Path)
if err != nil {
return []File{}, err
}
createdFilelist = append(createdFilelist, file)
t.Filesize += uploadedFile.Length()
ORM.Create(&file)
}

t.FileList = createdFilelist
t.Update(false)
return createdFilelist, nil
}

// UpdateUnscope : Update a torrent based on model
func (t *Torrent) UpdateUnscope() (int, error) {
return t.Update(true)
Expand Down
Loading