diff --git a/distro/debian/snapshot/snapshot.go b/distro/debian/snapshot/snapshot.go index 8e2eaa9..3a3f606 100644 --- a/distro/debian/snapshot/snapshot.go +++ b/distro/debian/snapshot/snapshot.go @@ -1,17 +1,37 @@ package snapshot import ( + "bytes" + "compress/bzip2" + "compress/gzip" + "context" "errors" "fmt" + "io" + "net/http" "net/url" + "os" "regexp" "strings" + "time" + + "github.com/PuerkitoBio/goquery" + "github.com/rs/zerolog/log" + "github.com/ulikunitz/xz" + "golang.org/x/time/rate" "code.dumpstack.io/tools/out-of-tree/distro/debian/snapshot/mr" ) const URL = "https://snapshot.debian.org" +var Limiter = rate.NewLimiter(rate.Every(time.Second), 1) + +// Retries in case of 5xx errors +var Retries = 10 + +var HttpTimeout = time.Second * 5 + func SourcePackageVersions(name string) (versions []string, err error) { pkg, err := mr.GetPackage(name) if err != nil { @@ -40,6 +60,7 @@ type Package struct { Snapshot string Archive string + Codename string Component string } } @@ -78,6 +99,11 @@ func NewPackage(name, srcname, version, arch string) (p Package, err error) { } p.Repo.Component = split[2] + p.Repo.Codename, err = p.getCodename() + if err != nil { + return + } + return } @@ -94,7 +120,157 @@ func (p Package) getHash() (hash string, err error) { } } - err = errors.New("not found") + err = errors.New("hash not found") + return +} + +func (p Package) getCodename() (dist string, err error) { + dists, err := p.dists() + if err != nil { + return + } + + for _, dist = range dists { + var distHasPackage bool + distHasPackage, err = p.isDistHasPackage(dist) + if err != nil { + return + } + if distHasPackage { + return + } + } + + err = errors.New("codename not found") + return +} + +func (p Package) dists() (dists []string, err error) { + query, err := url.JoinPath(URL, "archive", p.Repo.Archive, + p.Repo.Snapshot, "dists/") + if err != nil { + return + } + + resp, err := httpGetWithRetry(query) + if err != nil { + return + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + err = fmt.Errorf("%d (%s)", resp.StatusCode, query) + return + } + + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return + } + + doc.Find("table tr").Each(func(i int, s *goquery.Selection) { + html, err := s.Html() + if err != nil { + return + } + if !strings.Contains(html, "d 0; i-- { + flog.Trace().Msg("wait") + Limiter.Wait(context.Background()) + + client := http.Client{Timeout: HttpTimeout} + + flog.Trace().Msg("start") + resp, err = client.Get(query) + if err != nil { + if os.IsTimeout(err) { + flog.Debug().Msgf("timeout; retry (%d left)", i) + continue + } + flog.Error().Err(err).Msg("") + return + } + flog.Debug().Msgf("%s", resp.Status) + + if resp.StatusCode < 500 { + break + } + + resp.Body.Close() + flog.Debug().Msgf("retry (%d left)", i) + } + + if resp.StatusCode >= 400 { + err = fmt.Errorf("%d (%s)", resp.StatusCode, query) + } return }