diff --git a/.github/workflows/debian-cache.yml b/.github/workflows/debian-cache.yml index 17a1b8b..7ef1030 100644 --- a/.github/workflows/debian-cache.yml +++ b/.github/workflows/debian-cache.yml @@ -68,9 +68,7 @@ jobs: def get_kernels() -> bool: status, output = getstatusoutput( - "./out-of-tree --log-level=warn " - "distro debian get-deb " - "--ignore-cached --max=16" + "./out-of-tree distro debian fetch --max=16" ) logging.info(output) return status == 0 diff --git a/distro.go b/distro.go index 394080c..c31df62 100644 --- a/distro.go +++ b/distro.go @@ -1,11 +1,14 @@ package main import ( + "context" "os" "path/filepath" "regexp" + "time" "github.com/cavaliergopher/grab/v3" + "github.com/remeh/sizedwaitgroup" "github.com/rs/zerolog/log" "code.dumpstack.io/tools/out-of-tree/cache" @@ -20,8 +23,8 @@ type DistroCmd struct { } type DebianCmd struct { - Cache DebianCacheCmd `cmd:"" help:"populate cache"` - GetDeb DebianGetDebCmd `cmd:"" help:"download deb packages"` + Cache DebianCacheCmd `cmd:"" help:"populate cache"` + Fetch DebianFetchCmd `cmd:"" help:"download deb packages"` } type DebianCacheCmd struct { @@ -47,16 +50,81 @@ func (cmd *DebianCacheCmd) Run() (err error) { return } -type DebianGetDebCmd struct { +type DebianFetchCmd struct { Path string `help:"path to download directory" type:"existingdir" default:"./"` Regexp string `help:"match deb pkg names by regexp" default:".*"` - IgnoreCached bool `help:"ignore packages found on remote mirror"` + IgnoreMirror bool `help:"ignore check if packages on the mirror"` Max int `help:"do not download more than X" default:"100500"` + + Threads int `help:"parallel download threads" default:"8"` + + Timeout time.Duration `help:"timeout for each download" default:"1m"` + + swg sizedwaitgroup.SizedWaitGroup + hasResults bool } -func (cmd DebianGetDebCmd) Run() (err error) { +func (cmd *DebianFetchCmd) fetch(pkg snapshot.Package) { + flog := log.With(). + Str("pkg", pkg.Deb.Name). + Logger() + + defer cmd.swg.Done() + + if !cmd.IgnoreMirror { + flog.Debug().Msg("check mirror") + found, _ := cache.PackageURL(config.Debian, pkg.Deb.URL) + if found { + flog.Info().Msg("found on the mirror") + return + } + } + + target := filepath.Join(cmd.Path, filepath.Base(pkg.Deb.URL)) + + if fs.PathExists(target) { + flog.Debug().Msg("already exists") + return + } + + tmp, err := os.MkdirTemp(cmd.Path, "tmp-") + if err != nil { + flog.Fatal().Err(err).Msg("mkdir") + return + } + defer os.RemoveAll(tmp) + + flog.Info().Msg("fetch") + flog.Debug().Msg(pkg.Deb.URL) + + ctx, cancel := context.WithTimeout(context.Background(), cmd.Timeout) + defer cancel() + + req, err := grab.NewRequest(tmp, pkg.Deb.URL) + if err != nil { + flog.Warn().Err(err).Msg("cannot create request") + return + } + req = req.WithContext(ctx) + + resp := grab.DefaultClient.Do(req) + if err := resp.Err(); err != nil { + flog.Warn().Err(err).Msg("request cancelled") + return + } + + err = os.Rename(resp.Filename, target) + if err != nil { + flog.Fatal().Err(err).Msg("mv") + } + + cmd.hasResults = true + cmd.Max-- +} + +func (cmd *DebianFetchCmd) Run() (err error) { re, err := regexp.Compile(cmd.Regexp) if err != nil { log.Fatal().Err(err).Msg("regexp") @@ -79,54 +147,18 @@ func (cmd DebianGetDebCmd) Run() (err error) { } } - tmp, err := os.MkdirTemp(cmd.Path, "tmp-") - if err != nil { - return - } - defer os.RemoveAll(tmp) - - hasresults := false - + cmd.swg = sizedwaitgroup.New(cmd.Threads) for _, pkg := range packages { if cmd.Max <= 0 { break } - if cmd.IgnoreCached { - log.Debug().Msgf("check cache for %s", pkg.Deb.Name) - found, _ := cache.PackageURL(config.Debian, pkg.Deb.URL) - if found { - log.Debug().Msgf("%s already cached", pkg.Deb.Name) - continue - } - } - - target := filepath.Join(cmd.Path, filepath.Base(pkg.Deb.URL)) - - if fs.PathExists(target) { - log.Info().Msgf("%s already exists", pkg.Deb.URL) - continue - } - - log.Info().Msgf("downloading %s", pkg.Deb.URL) - - resp, err := grab.Get(tmp, pkg.Deb.URL) - if err != nil { - err = nil - log.Warn().Err(err).Msg("download") - continue - } - - err = os.Rename(resp.Filename, target) - if err != nil { - log.Fatal().Err(err).Msg("mv") - } - - hasresults = true - cmd.Max-- + cmd.swg.Add() + go cmd.fetch(pkg) } + cmd.swg.Wait() - if !hasresults { + if !cmd.hasResults { log.Fatal().Msg("no packages found to download") } return