feat: parallel download of deb packages
This commit is contained in:
		
							
								
								
									
										4
									
								
								.github/workflows/debian-cache.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/workflows/debian-cache.yml
									
									
									
									
										vendored
									
									
								
							| @@ -68,9 +68,7 @@ jobs: | ||||
|  | ||||
|         def get_kernels() -> bool: | ||||
|             status, output = getstatusoutput( | ||||
|                 "./out-of-tree --log-level=warn " | ||||
|                 "distro debian get-deb " | ||||
|                 "--ignore-cached --max=16" | ||||
|                 "./out-of-tree distro debian fetch --max=16" | ||||
|             ) | ||||
|             logging.info(output) | ||||
|             return status == 0 | ||||
|   | ||||
							
								
								
									
										124
									
								
								distro.go
									
									
									
									
									
								
							
							
						
						
									
										124
									
								
								distro.go
									
									
									
									
									
								
							| @@ -1,11 +1,14 @@ | ||||
| package main | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"regexp" | ||||
| 	"time" | ||||
|  | ||||
| 	"github.com/cavaliergopher/grab/v3" | ||||
| 	"github.com/remeh/sizedwaitgroup" | ||||
| 	"github.com/rs/zerolog/log" | ||||
|  | ||||
| 	"code.dumpstack.io/tools/out-of-tree/cache" | ||||
| @@ -20,8 +23,8 @@ type DistroCmd struct { | ||||
| } | ||||
|  | ||||
| type DebianCmd struct { | ||||
| 	Cache  DebianCacheCmd  `cmd:"" help:"populate cache"` | ||||
| 	GetDeb DebianGetDebCmd `cmd:"" help:"download deb packages"` | ||||
| 	Cache DebianCacheCmd `cmd:"" help:"populate cache"` | ||||
| 	Fetch DebianFetchCmd `cmd:"" help:"download deb packages"` | ||||
| } | ||||
|  | ||||
| type DebianCacheCmd struct { | ||||
| @@ -47,16 +50,81 @@ func (cmd *DebianCacheCmd) Run() (err error) { | ||||
| 	return | ||||
| } | ||||
|  | ||||
| type DebianGetDebCmd struct { | ||||
| type DebianFetchCmd struct { | ||||
| 	Path   string `help:"path to download directory" type:"existingdir" default:"./"` | ||||
| 	Regexp string `help:"match deb pkg names by regexp" default:".*"` | ||||
|  | ||||
| 	IgnoreCached bool `help:"ignore packages found on remote mirror"` | ||||
| 	IgnoreMirror bool `help:"ignore check if packages on the mirror"` | ||||
|  | ||||
| 	Max int `help:"do not download more than X" default:"100500"` | ||||
|  | ||||
| 	Threads int `help:"parallel download threads" default:"8"` | ||||
|  | ||||
| 	Timeout time.Duration `help:"timeout for each download" default:"1m"` | ||||
|  | ||||
| 	swg        sizedwaitgroup.SizedWaitGroup | ||||
| 	hasResults bool | ||||
| } | ||||
|  | ||||
| func (cmd DebianGetDebCmd) Run() (err error) { | ||||
| func (cmd *DebianFetchCmd) fetch(pkg snapshot.Package) { | ||||
| 	flog := log.With(). | ||||
| 		Str("pkg", pkg.Deb.Name). | ||||
| 		Logger() | ||||
|  | ||||
| 	defer cmd.swg.Done() | ||||
|  | ||||
| 	if !cmd.IgnoreMirror { | ||||
| 		flog.Debug().Msg("check mirror") | ||||
| 		found, _ := cache.PackageURL(config.Debian, pkg.Deb.URL) | ||||
| 		if found { | ||||
| 			flog.Info().Msg("found on the mirror") | ||||
| 			return | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	target := filepath.Join(cmd.Path, filepath.Base(pkg.Deb.URL)) | ||||
|  | ||||
| 	if fs.PathExists(target) { | ||||
| 		flog.Debug().Msg("already exists") | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	tmp, err := os.MkdirTemp(cmd.Path, "tmp-") | ||||
| 	if err != nil { | ||||
| 		flog.Fatal().Err(err).Msg("mkdir") | ||||
| 		return | ||||
| 	} | ||||
| 	defer os.RemoveAll(tmp) | ||||
|  | ||||
| 	flog.Info().Msg("fetch") | ||||
| 	flog.Debug().Msg(pkg.Deb.URL) | ||||
|  | ||||
| 	ctx, cancel := context.WithTimeout(context.Background(), cmd.Timeout) | ||||
| 	defer cancel() | ||||
|  | ||||
| 	req, err := grab.NewRequest(tmp, pkg.Deb.URL) | ||||
| 	if err != nil { | ||||
| 		flog.Warn().Err(err).Msg("cannot create request") | ||||
| 		return | ||||
| 	} | ||||
| 	req = req.WithContext(ctx) | ||||
|  | ||||
| 	resp := grab.DefaultClient.Do(req) | ||||
| 	if err := resp.Err(); err != nil { | ||||
| 		flog.Warn().Err(err).Msg("request cancelled") | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	err = os.Rename(resp.Filename, target) | ||||
| 	if err != nil { | ||||
| 		flog.Fatal().Err(err).Msg("mv") | ||||
| 	} | ||||
|  | ||||
| 	cmd.hasResults = true | ||||
| 	cmd.Max-- | ||||
| } | ||||
|  | ||||
| func (cmd *DebianFetchCmd) Run() (err error) { | ||||
| 	re, err := regexp.Compile(cmd.Regexp) | ||||
| 	if err != nil { | ||||
| 		log.Fatal().Err(err).Msg("regexp") | ||||
| @@ -79,54 +147,18 @@ func (cmd DebianGetDebCmd) Run() (err error) { | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	tmp, err := os.MkdirTemp(cmd.Path, "tmp-") | ||||
| 	if err != nil { | ||||
| 		return | ||||
| 	} | ||||
| 	defer os.RemoveAll(tmp) | ||||
|  | ||||
| 	hasresults := false | ||||
|  | ||||
| 	cmd.swg = sizedwaitgroup.New(cmd.Threads) | ||||
| 	for _, pkg := range packages { | ||||
| 		if cmd.Max <= 0 { | ||||
| 			break | ||||
| 		} | ||||
|  | ||||
| 		if cmd.IgnoreCached { | ||||
| 			log.Debug().Msgf("check cache for %s", pkg.Deb.Name) | ||||
| 			found, _ := cache.PackageURL(config.Debian, pkg.Deb.URL) | ||||
| 			if found { | ||||
| 				log.Debug().Msgf("%s already cached", pkg.Deb.Name) | ||||
| 				continue | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		target := filepath.Join(cmd.Path, filepath.Base(pkg.Deb.URL)) | ||||
|  | ||||
| 		if fs.PathExists(target) { | ||||
| 			log.Info().Msgf("%s already exists", pkg.Deb.URL) | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		log.Info().Msgf("downloading %s", pkg.Deb.URL) | ||||
|  | ||||
| 		resp, err := grab.Get(tmp, pkg.Deb.URL) | ||||
| 		if err != nil { | ||||
| 			err = nil | ||||
| 			log.Warn().Err(err).Msg("download") | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		err = os.Rename(resp.Filename, target) | ||||
| 		if err != nil { | ||||
| 			log.Fatal().Err(err).Msg("mv") | ||||
| 		} | ||||
|  | ||||
| 		hasresults = true | ||||
| 		cmd.Max-- | ||||
| 		cmd.swg.Add() | ||||
| 		go cmd.fetch(pkg) | ||||
| 	} | ||||
| 	cmd.swg.Wait() | ||||
|  | ||||
| 	if !hasresults { | ||||
| 	if !cmd.hasResults { | ||||
| 		log.Fatal().Msg("no packages found to download") | ||||
| 	} | ||||
| 	return | ||||
|   | ||||
		Reference in New Issue
	
	Block a user