1
0
out-of-tree/distro/debian/snapshot/snapshot.go

340 lines
6.2 KiB
Go

package snapshot
import (
"bytes"
"compress/bzip2"
"compress/gzip"
"context"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"os"
"regexp"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/rs/zerolog/log"
"github.com/ulikunitz/xz"
"golang.org/x/time/rate"
"code.dumpstack.io/tools/out-of-tree/distro/debian/snapshot/mr"
)
const timeLayout = "20060102T150405Z"
const URL = "https://snapshot.debian.org"
var Limiter = rate.NewLimiter(rate.Every(time.Second), 1)
// Retries in case of 5xx errors
var Retries = 10
var HttpTimeout = time.Second * 5
func SourcePackageVersions(name string) (versions []string, err error) {
pkg, err := mr.GetPackage(name)
if err != nil {
return
}
for _, res := range pkg.Result {
versions = append(versions, res.Version)
}
return
}
type Package struct {
Name string
Source string
Version string
Arch string
Deb struct {
Name string
Hash string
URL string
}
Repo struct {
Snapshot string
SnapshotDists []string
Archive string
Component string
}
}
func NewPackage(name, srcname, version string, archs []string) (
p Package, err error) {
p.Name = name
p.Source = srcname
p.Version = version
p.Arch, p.Deb.Hash, err = p.getHash(archs)
if err != nil {
return
}
info, err := mr.GetInfo(p.Deb.Hash)
if err != nil {
return
}
p.Deb.Name = info.Result[0].Name
p.Repo.Archive = info.Result[0].ArchiveName
p.Repo.Snapshot = info.Result[0].FirstSeen
p.Deb.URL, err = url.JoinPath(URL, "archive", p.Repo.Archive,
p.Repo.Snapshot, info.Result[0].Path, p.Deb.Name)
if err != nil {
return
}
split := strings.Split(info.Result[0].Path, "/")
if split[1] != "pool" || len(split) < 3 {
err = fmt.Errorf("incorrect path: %s", info.Result[0].Path)
return
}
p.Repo.Component = split[2]
p.Repo.SnapshotDists, err = p.dists()
if err != nil {
return
}
return
}
func (p Package) getHash(archs []string) (arch, hash string, err error) {
binfiles, err := mr.GetBinfiles(p.Name, p.Version)
if err != nil {
return
}
for _, res := range binfiles.Result {
for _, allowedArch := range archs {
if res.Architecture == allowedArch {
arch = res.Architecture
hash = res.Hash
return
}
}
}
err = errors.New("hash not found")
return
}
// Because the snapshot date is when the package was first introduced,
// it will probably always be sid or experimental.
func (p Package) GetCodename() (dist string, err error) {
for _, dist = range p.Repo.SnapshotDists {
var distHasPackage bool
distHasPackage, err = p.isDistHasPackage(dist)
if err != nil {
return
}
if distHasPackage {
return
}
}
err = errors.New("codename not found")
return
}
func (p Package) dists() (dists []string, err error) {
query, err := url.JoinPath(URL, "archive", p.Repo.Archive,
p.Repo.Snapshot, "dists/")
if err != nil {
return
}
resp, err := httpGetWithRetry(query)
if err != nil {
return
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
err = fmt.Errorf("%d (%s)", resp.StatusCode, query)
return
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return
}
doc.Find("table tr").Each(func(i int, s *goquery.Selection) {
html, err := s.Html()
if err != nil {
return
}
if !strings.Contains(html, "<td>d</td") {
return
}
s.Find("a").Each(func(i int, s *goquery.Selection) {
if strings.Contains(s.Text(), "..") {
return
}
dist := strings.Replace(s.Text(), "/", "", -1)
dists = append(dists, dist)
})
})
return
}
func (p Package) isDistHasPackage(dist string) (yes bool, err error) {
var buf []byte
for _, ext := range []string{"xz", "gz", "bz2"} {
buf, err = p.distPackage(dist, ext)
if err == nil {
break
}
}
if err != nil {
return
}
yes = bytes.Contains(buf, []byte(p.Deb.Name))
return
}
func (p Package) distPackage(dist string, ext string) (buf []byte, err error) {
query, err := url.JoinPath(URL, "archive", p.Repo.Archive,
p.Repo.Snapshot, "dists", dist,
fmt.Sprintf("main/binary-%s/Packages.%s", p.Arch, ext),
)
if err != nil {
return
}
resp, err := httpGetWithRetry(query)
if err != nil {
return
}
defer resp.Body.Close()
var reader io.Reader
switch ext {
case "xz":
reader, err = xz.NewReader(resp.Body)
case "gz":
reader, err = gzip.NewReader(resp.Body)
case "bz2":
reader = bzip2.NewReader(resp.Body)
default:
err = fmt.Errorf("%s not supported", ext)
}
if err != nil {
return
}
buf, err = io.ReadAll(reader)
return
}
func httpGetWithRetry(query string) (resp *http.Response, err error) {
flog := log.With().Str("url", query).Logger()
for i := Retries; i > 0; i-- {
flog.Trace().Msg("wait")
Limiter.Wait(context.Background())
client := http.Client{Timeout: HttpTimeout}
flog.Trace().Msg("start")
resp, err = client.Get(query)
if err != nil {
if os.IsTimeout(err) {
flog.Debug().Msgf("timeout; retry (%d left)", i)
continue
}
flog.Error().Err(err).Msg("")
return
}
flog.Debug().Msgf("%s", resp.Status)
if resp.StatusCode < 500 {
break
}
resp.Body.Close()
flog.Debug().Msgf("retry (%d left)", i)
}
if resp.StatusCode >= 400 {
err = fmt.Errorf("%d (%s)", resp.StatusCode, query)
}
return
}
func contains(pkgs []Package, pkg Package) bool {
for _, p := range pkgs {
if p.Name == pkg.Name {
return true
}
}
return false
}
func filtered(s string, filter []string) bool {
for _, f := range filter {
if strings.Contains(s, f) {
return true
}
}
return false
}
func Packages(srcname, version, regex string, archs, filter []string) (
pkgs []Package, err error) {
binpkgs, err := mr.GetBinpackages(srcname, version)
if err == mr.ErrNotFound {
err = nil
return
}
if err != nil {
return
}
r := regexp.MustCompile(regex)
for _, res := range binpkgs.Result {
if res.Version != version {
continue
}
if !r.MatchString(res.Name) || filtered(res.Name, filter) {
continue
}
log.Trace().Msgf("matched %v", res.Name)
var pkg Package
pkg, err = NewPackage(res.Name, srcname, version, archs)
if err != nil {
return
}
if contains(pkgs, pkg) {
log.Trace().Msgf("%v already in slice O_o", pkg.Name)
continue
}
log.Trace().Msgf("append %v", pkg.Name)
pkgs = append(pkgs, pkg)
}
return
}