2023-05-11 12:03:18 +00:00
|
|
|
package snapshot
|
|
|
|
|
|
|
|
import (
|
2023-05-11 16:26:16 +00:00
|
|
|
"bytes"
|
|
|
|
"compress/bzip2"
|
|
|
|
"compress/gzip"
|
|
|
|
"context"
|
2023-05-11 12:03:18 +00:00
|
|
|
"errors"
|
2023-05-11 13:20:36 +00:00
|
|
|
"fmt"
|
2023-05-11 16:26:16 +00:00
|
|
|
"io"
|
|
|
|
"net/http"
|
2023-05-11 13:20:36 +00:00
|
|
|
"net/url"
|
2023-05-11 16:26:16 +00:00
|
|
|
"os"
|
2023-05-11 12:03:18 +00:00
|
|
|
"regexp"
|
2023-05-11 13:20:36 +00:00
|
|
|
"strings"
|
2023-05-11 16:26:16 +00:00
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"github.com/rs/zerolog/log"
|
|
|
|
"github.com/ulikunitz/xz"
|
|
|
|
"golang.org/x/time/rate"
|
2023-05-11 12:03:18 +00:00
|
|
|
|
|
|
|
"code.dumpstack.io/tools/out-of-tree/distro/debian/snapshot/mr"
|
|
|
|
)
|
|
|
|
|
2023-05-11 16:52:47 +00:00
|
|
|
const timeLayout = "20060102T150405Z"
|
|
|
|
|
2023-05-11 13:20:36 +00:00
|
|
|
const URL = "https://snapshot.debian.org"
|
|
|
|
|
2023-05-11 16:26:16 +00:00
|
|
|
var Limiter = rate.NewLimiter(rate.Every(time.Second), 1)
|
|
|
|
|
|
|
|
// Retries in case of 5xx errors
|
|
|
|
var Retries = 10
|
|
|
|
|
|
|
|
var HttpTimeout = time.Second * 5
|
|
|
|
|
2023-05-11 12:03:18 +00:00
|
|
|
func SourcePackageVersions(name string) (versions []string, err error) {
|
|
|
|
pkg, err := mr.GetPackage(name)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, res := range pkg.Result {
|
|
|
|
versions = append(versions, res.Version)
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
type Package struct {
|
|
|
|
Name string
|
|
|
|
Source string
|
|
|
|
Version string
|
|
|
|
Arch string
|
2023-05-11 13:20:36 +00:00
|
|
|
|
|
|
|
Deb struct {
|
2023-05-11 12:03:18 +00:00
|
|
|
Name string
|
|
|
|
Hash string
|
2023-05-11 13:20:36 +00:00
|
|
|
URL string
|
2023-05-11 12:03:18 +00:00
|
|
|
}
|
2023-05-11 13:20:36 +00:00
|
|
|
|
2023-05-11 12:03:18 +00:00
|
|
|
Repo struct {
|
2023-05-11 16:52:47 +00:00
|
|
|
Snapshot string
|
|
|
|
SnapshotDists []string
|
|
|
|
|
|
|
|
Archive string
|
2023-05-11 13:20:36 +00:00
|
|
|
|
|
|
|
Component string
|
2023-05-11 12:03:18 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-14 11:38:41 +00:00
|
|
|
func NewPackage(name, srcname, version string, archs []string) (
|
|
|
|
p Package, err error) {
|
|
|
|
|
2023-05-11 12:03:18 +00:00
|
|
|
p.Name = name
|
|
|
|
p.Source = srcname
|
|
|
|
p.Version = version
|
|
|
|
|
2023-05-14 11:38:41 +00:00
|
|
|
p.Arch, p.Deb.Hash, err = p.getHash(archs)
|
2023-05-11 12:03:18 +00:00
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-05-11 13:20:36 +00:00
|
|
|
info, err := mr.GetInfo(p.Deb.Hash)
|
2023-05-11 12:03:18 +00:00
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-05-11 13:20:36 +00:00
|
|
|
p.Deb.Name = info.Result[0].Name
|
2023-05-11 12:03:18 +00:00
|
|
|
|
|
|
|
p.Repo.Archive = info.Result[0].ArchiveName
|
|
|
|
p.Repo.Snapshot = info.Result[0].FirstSeen
|
|
|
|
|
2023-05-11 13:20:36 +00:00
|
|
|
p.Deb.URL, err = url.JoinPath(URL, "archive", p.Repo.Archive,
|
|
|
|
p.Repo.Snapshot, info.Result[0].Path, p.Deb.Name)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
split := strings.Split(info.Result[0].Path, "/")
|
|
|
|
if split[1] != "pool" || len(split) < 3 {
|
|
|
|
err = fmt.Errorf("incorrect path: %s", info.Result[0].Path)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
p.Repo.Component = split[2]
|
|
|
|
|
2023-05-11 16:52:47 +00:00
|
|
|
p.Repo.SnapshotDists, err = p.dists()
|
2023-05-11 16:26:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-05-11 12:03:18 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-05-14 11:38:41 +00:00
|
|
|
func (p Package) getHash(archs []string) (arch, hash string, err error) {
|
2023-05-11 12:03:18 +00:00
|
|
|
binfiles, err := mr.GetBinfiles(p.Name, p.Version)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, res := range binfiles.Result {
|
2023-05-14 11:38:41 +00:00
|
|
|
for _, allowedArch := range archs {
|
|
|
|
if res.Architecture == allowedArch {
|
|
|
|
arch = res.Architecture
|
|
|
|
hash = res.Hash
|
|
|
|
return
|
|
|
|
}
|
2023-05-11 12:03:18 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-11 16:26:16 +00:00
|
|
|
err = errors.New("hash not found")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-05-11 16:52:47 +00:00
|
|
|
// Because the snapshot date is when the package was first introduced,
|
|
|
|
// it will probably always be sid or experimental.
|
|
|
|
func (p Package) GetCodename() (dist string, err error) {
|
|
|
|
for _, dist = range p.Repo.SnapshotDists {
|
2023-05-11 16:26:16 +00:00
|
|
|
var distHasPackage bool
|
|
|
|
distHasPackage, err = p.isDistHasPackage(dist)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if distHasPackage {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
err = errors.New("codename not found")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p Package) dists() (dists []string, err error) {
|
|
|
|
query, err := url.JoinPath(URL, "archive", p.Repo.Archive,
|
|
|
|
p.Repo.Snapshot, "dists/")
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
resp, err := httpGetWithRetry(query)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
defer resp.Body.Close()
|
|
|
|
|
|
|
|
if resp.StatusCode != 200 {
|
|
|
|
err = fmt.Errorf("%d (%s)", resp.StatusCode, query)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
doc.Find("table tr").Each(func(i int, s *goquery.Selection) {
|
|
|
|
html, err := s.Html()
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if !strings.Contains(html, "<td>d</td") {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
s.Find("a").Each(func(i int, s *goquery.Selection) {
|
|
|
|
if strings.Contains(s.Text(), "..") {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
dist := strings.Replace(s.Text(), "/", "", -1)
|
|
|
|
dists = append(dists, dist)
|
|
|
|
})
|
|
|
|
})
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p Package) isDistHasPackage(dist string) (yes bool, err error) {
|
|
|
|
var buf []byte
|
|
|
|
for _, ext := range []string{"xz", "gz", "bz2"} {
|
|
|
|
buf, err = p.distPackage(dist, ext)
|
|
|
|
if err == nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
yes = bytes.Contains(buf, []byte(p.Deb.Name))
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p Package) distPackage(dist string, ext string) (buf []byte, err error) {
|
|
|
|
query, err := url.JoinPath(URL, "archive", p.Repo.Archive,
|
|
|
|
p.Repo.Snapshot, "dists", dist,
|
|
|
|
fmt.Sprintf("main/binary-%s/Packages.%s", p.Arch, ext),
|
|
|
|
)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
resp, err := httpGetWithRetry(query)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
defer resp.Body.Close()
|
|
|
|
|
|
|
|
var reader io.Reader
|
|
|
|
switch ext {
|
|
|
|
case "xz":
|
|
|
|
reader, err = xz.NewReader(resp.Body)
|
|
|
|
case "gz":
|
|
|
|
reader, err = gzip.NewReader(resp.Body)
|
|
|
|
case "bz2":
|
|
|
|
reader = bzip2.NewReader(resp.Body)
|
|
|
|
default:
|
|
|
|
err = fmt.Errorf("%s not supported", ext)
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
buf, err = io.ReadAll(reader)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func httpGetWithRetry(query string) (resp *http.Response, err error) {
|
|
|
|
flog := log.With().Str("url", query).Logger()
|
|
|
|
for i := Retries; i > 0; i-- {
|
|
|
|
flog.Trace().Msg("wait")
|
|
|
|
Limiter.Wait(context.Background())
|
|
|
|
|
|
|
|
client := http.Client{Timeout: HttpTimeout}
|
|
|
|
|
|
|
|
flog.Trace().Msg("start")
|
|
|
|
resp, err = client.Get(query)
|
|
|
|
if err != nil {
|
|
|
|
if os.IsTimeout(err) {
|
|
|
|
flog.Debug().Msgf("timeout; retry (%d left)", i)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
flog.Error().Err(err).Msg("")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
flog.Debug().Msgf("%s", resp.Status)
|
|
|
|
|
|
|
|
if resp.StatusCode < 500 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
resp.Body.Close()
|
|
|
|
flog.Debug().Msgf("retry (%d left)", i)
|
|
|
|
}
|
|
|
|
|
|
|
|
if resp.StatusCode >= 400 {
|
|
|
|
err = fmt.Errorf("%d (%s)", resp.StatusCode, query)
|
|
|
|
}
|
2023-05-11 12:03:18 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-05-11 21:20:00 +00:00
|
|
|
func contains(pkgs []Package, pkg Package) bool {
|
|
|
|
for _, p := range pkgs {
|
|
|
|
if p.Name == pkg.Name {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2023-05-11 22:17:31 +00:00
|
|
|
func filtered(s string, filter []string) bool {
|
|
|
|
for _, f := range filter {
|
|
|
|
if strings.Contains(s, f) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2023-05-14 11:38:41 +00:00
|
|
|
func Packages(srcname, version, regex string, archs, filter []string) (
|
|
|
|
pkgs []Package, err error) {
|
2023-05-11 22:17:31 +00:00
|
|
|
|
2023-05-11 12:03:18 +00:00
|
|
|
binpkgs, err := mr.GetBinpackages(srcname, version)
|
2023-05-11 22:06:31 +00:00
|
|
|
if err == mr.ErrNotFound {
|
|
|
|
err = nil
|
|
|
|
return
|
|
|
|
}
|
2023-05-11 12:03:18 +00:00
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
r := regexp.MustCompile(regex)
|
|
|
|
|
|
|
|
for _, res := range binpkgs.Result {
|
2023-05-11 22:17:31 +00:00
|
|
|
if !r.MatchString(res.Name) || filtered(res.Name, filter) {
|
2023-05-11 12:03:18 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2023-05-11 20:24:54 +00:00
|
|
|
log.Trace().Msgf("matched %v", res.Name)
|
|
|
|
|
2023-05-11 12:03:18 +00:00
|
|
|
var pkg Package
|
2023-05-14 11:38:41 +00:00
|
|
|
pkg, err = NewPackage(res.Name, srcname, version, archs)
|
2023-05-11 12:03:18 +00:00
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-05-11 21:20:00 +00:00
|
|
|
if contains(pkgs, pkg) {
|
|
|
|
log.Trace().Msgf("%v already in slice O_o", pkg.Name)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2023-05-11 20:24:54 +00:00
|
|
|
log.Trace().Msgf("append %v", pkg.Name)
|
2023-05-11 12:03:18 +00:00
|
|
|
pkgs = append(pkgs, pkg)
|
|
|
|
}
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|