1
0
Fork 0

feat: parallel download of deb packages

timestamps
dump_stack() 2023-05-17 09:59:31 +00:00
parent 72f52d3200
commit a68ceacb43
Signed by: dump_stack
GPG Key ID: BE44DA8C062D87DC
2 changed files with 79 additions and 49 deletions

View File

@ -68,9 +68,7 @@ jobs:
def get_kernels() -> bool:
status, output = getstatusoutput(
"./out-of-tree --log-level=warn "
"distro debian get-deb "
"--ignore-cached --max=16"
"./out-of-tree distro debian fetch --max=16"
)
logging.info(output)
return status == 0

124
distro.go
View File

@ -1,11 +1,14 @@
package main
import (
"context"
"os"
"path/filepath"
"regexp"
"time"
"github.com/cavaliergopher/grab/v3"
"github.com/remeh/sizedwaitgroup"
"github.com/rs/zerolog/log"
"code.dumpstack.io/tools/out-of-tree/cache"
@ -20,8 +23,8 @@ type DistroCmd struct {
}
type DebianCmd struct {
Cache DebianCacheCmd `cmd:"" help:"populate cache"`
GetDeb DebianGetDebCmd `cmd:"" help:"download deb packages"`
Cache DebianCacheCmd `cmd:"" help:"populate cache"`
Fetch DebianFetchCmd `cmd:"" help:"download deb packages"`
}
type DebianCacheCmd struct {
@ -47,16 +50,81 @@ func (cmd *DebianCacheCmd) Run() (err error) {
return
}
type DebianGetDebCmd struct {
type DebianFetchCmd struct {
Path string `help:"path to download directory" type:"existingdir" default:"./"`
Regexp string `help:"match deb pkg names by regexp" default:".*"`
IgnoreCached bool `help:"ignore packages found on remote mirror"`
IgnoreMirror bool `help:"ignore check if packages on the mirror"`
Max int `help:"do not download more than X" default:"100500"`
Threads int `help:"parallel download threads" default:"8"`
Timeout time.Duration `help:"timeout for each download" default:"1m"`
swg sizedwaitgroup.SizedWaitGroup
hasResults bool
}
func (cmd DebianGetDebCmd) Run() (err error) {
func (cmd *DebianFetchCmd) fetch(pkg snapshot.Package) {
flog := log.With().
Str("pkg", pkg.Deb.Name).
Logger()
defer cmd.swg.Done()
if !cmd.IgnoreMirror {
flog.Debug().Msg("check mirror")
found, _ := cache.PackageURL(config.Debian, pkg.Deb.URL)
if found {
flog.Info().Msg("found on the mirror")
return
}
}
target := filepath.Join(cmd.Path, filepath.Base(pkg.Deb.URL))
if fs.PathExists(target) {
flog.Debug().Msg("already exists")
return
}
tmp, err := os.MkdirTemp(cmd.Path, "tmp-")
if err != nil {
flog.Fatal().Err(err).Msg("mkdir")
return
}
defer os.RemoveAll(tmp)
flog.Info().Msg("fetch")
flog.Debug().Msg(pkg.Deb.URL)
ctx, cancel := context.WithTimeout(context.Background(), cmd.Timeout)
defer cancel()
req, err := grab.NewRequest(tmp, pkg.Deb.URL)
if err != nil {
flog.Warn().Err(err).Msg("cannot create request")
return
}
req = req.WithContext(ctx)
resp := grab.DefaultClient.Do(req)
if err := resp.Err(); err != nil {
flog.Warn().Err(err).Msg("request cancelled")
return
}
err = os.Rename(resp.Filename, target)
if err != nil {
flog.Fatal().Err(err).Msg("mv")
}
cmd.hasResults = true
cmd.Max--
}
func (cmd *DebianFetchCmd) Run() (err error) {
re, err := regexp.Compile(cmd.Regexp)
if err != nil {
log.Fatal().Err(err).Msg("regexp")
@ -79,54 +147,18 @@ func (cmd DebianGetDebCmd) Run() (err error) {
}
}
tmp, err := os.MkdirTemp(cmd.Path, "tmp-")
if err != nil {
return
}
defer os.RemoveAll(tmp)
hasresults := false
cmd.swg = sizedwaitgroup.New(cmd.Threads)
for _, pkg := range packages {
if cmd.Max <= 0 {
break
}
if cmd.IgnoreCached {
log.Debug().Msgf("check cache for %s", pkg.Deb.Name)
found, _ := cache.PackageURL(config.Debian, pkg.Deb.URL)
if found {
log.Debug().Msgf("%s already cached", pkg.Deb.Name)
continue
}
}
target := filepath.Join(cmd.Path, filepath.Base(pkg.Deb.URL))
if fs.PathExists(target) {
log.Info().Msgf("%s already exists", pkg.Deb.URL)
continue
}
log.Info().Msgf("downloading %s", pkg.Deb.URL)
resp, err := grab.Get(tmp, pkg.Deb.URL)
if err != nil {
err = nil
log.Warn().Err(err).Msg("download")
continue
}
err = os.Rename(resp.Filename, target)
if err != nil {
log.Fatal().Err(err).Msg("mv")
}
hasresults = true
cmd.Max--
cmd.swg.Add()
go cmd.fetch(pkg)
}
cmd.swg.Wait()
if !hasresults {
if !cmd.hasResults {
log.Fatal().Msg("no packages found to download")
}
return