package snapshot import ( "bytes" "compress/bzip2" "compress/gzip" "context" "errors" "fmt" "io" "net/http" "net/url" "os" "regexp" "strings" "time" "github.com/PuerkitoBio/goquery" "github.com/rs/zerolog/log" "github.com/ulikunitz/xz" "golang.org/x/time/rate" "code.dumpstack.io/tools/out-of-tree/distro/debian/snapshot/mr" ) const timeLayout = "20060102T150405Z" const URL = "https://snapshot.debian.org" var Limiter = rate.NewLimiter(rate.Every(time.Second), 1) // Retries in case of 5xx errors var Retries = 10 var HttpTimeout = time.Second * 5 func SourcePackageVersions(name string) (versions []string, err error) { pkg, err := mr.GetPackage(name) if err != nil { return } for _, res := range pkg.Result { versions = append(versions, res.Version) } return } type Package struct { Name string Source string Version string Arch string Deb struct { Name string Hash string URL string } Repo struct { Snapshot string SnapshotDists []string Archive string Component string } } func NewPackage(name, srcname, version string, archs []string) ( p Package, err error) { p.Name = name p.Source = srcname p.Version = version p.Arch, p.Deb.Hash, err = p.getHash(archs) if err != nil { return } info, err := mr.GetInfo(p.Deb.Hash) if err != nil { return } p.Deb.Name = info.Result[0].Name p.Repo.Archive = info.Result[0].ArchiveName p.Repo.Snapshot = info.Result[0].FirstSeen p.Deb.URL, err = url.JoinPath(URL, "archive", p.Repo.Archive, p.Repo.Snapshot, info.Result[0].Path, p.Deb.Name) if err != nil { return } split := strings.Split(info.Result[0].Path, "/") if split[1] != "pool" || len(split) < 3 { err = fmt.Errorf("incorrect path: %s", info.Result[0].Path) return } p.Repo.Component = split[2] p.Repo.SnapshotDists, err = p.dists() if err != nil { return } return } func (p Package) getHash(archs []string) (arch, hash string, err error) { binfiles, err := mr.GetBinfiles(p.Name, p.Version) if err != nil { return } for _, res := range binfiles.Result { for _, allowedArch := range archs { if res.Architecture == allowedArch { arch = res.Architecture hash = res.Hash return } } } err = errors.New("hash not found") return } // Because the snapshot date is when the package was first introduced, // it will probably always be sid or experimental. func (p Package) GetCodename() (dist string, err error) { for _, dist = range p.Repo.SnapshotDists { var distHasPackage bool distHasPackage, err = p.isDistHasPackage(dist) if err != nil { return } if distHasPackage { return } } err = errors.New("codename not found") return } func (p Package) dists() (dists []string, err error) { query, err := url.JoinPath(URL, "archive", p.Repo.Archive, p.Repo.Snapshot, "dists/") if err != nil { return } resp, err := httpGetWithRetry(query) if err != nil { return } defer resp.Body.Close() if resp.StatusCode != 200 { err = fmt.Errorf("%d (%s)", resp.StatusCode, query) return } doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { return } doc.Find("table tr").Each(func(i int, s *goquery.Selection) { html, err := s.Html() if err != nil { return } if !strings.Contains(html, "d 0; i-- { flog.Trace().Msg("wait") Limiter.Wait(context.Background()) client := http.Client{Timeout: HttpTimeout} flog.Trace().Msg("start") resp, err = client.Get(query) if err != nil { if os.IsTimeout(err) { flog.Debug().Msgf("timeout; retry (%d left)", i) continue } flog.Error().Err(err).Msg("") return } flog.Debug().Msgf("%s", resp.Status) if resp.StatusCode < 500 { break } resp.Body.Close() flog.Debug().Msgf("retry (%d left)", i) } if resp.StatusCode >= 400 { err = fmt.Errorf("%d (%s)", resp.StatusCode, query) } return } func contains(pkgs []Package, pkg Package) bool { for _, p := range pkgs { if p.Name == pkg.Name { return true } } return false } func filtered(s string, filter []string) bool { for _, f := range filter { if strings.Contains(s, f) { return true } } return false } func Packages(srcname, version, regex string, archs, filter []string) ( pkgs []Package, err error) { binpkgs, err := mr.GetBinpackages(srcname, version) if err == mr.ErrNotFound { err = nil return } if err != nil { return } r := regexp.MustCompile(regex) for _, res := range binpkgs.Result { if !r.MatchString(res.Name) || filtered(res.Name, filter) { continue } log.Trace().Msgf("matched %v", res.Name) var pkg Package pkg, err = NewPackage(res.Name, srcname, version, archs) if err != nil { return } if contains(pkgs, pkg) { log.Trace().Msgf("%v already in slice O_o", pkg.Name) continue } log.Trace().Msgf("append %v", pkg.Name) pkgs = append(pkgs, pkg) } return }