2024-01-22 10:36:06 +00:00
|
|
|
/*
|
2024-05-05 10:32:08 +00:00
|
|
|
* ALR - Any Linux Repository
|
|
|
|
* Copyright (C) 2024 Евгений Храмов
|
2024-01-22 10:36:06 +00:00
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
// Package dl contains abstractions for downloadingfiles and directories
|
|
|
|
// from various sources.
|
|
|
|
package dl
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"crypto/md5"
|
|
|
|
"crypto/sha1"
|
|
|
|
"crypto/sha256"
|
|
|
|
"crypto/sha512"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"hash"
|
|
|
|
"io"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"github.com/PuerkitoBio/purell"
|
|
|
|
"github.com/vmihailenco/msgpack/v5"
|
|
|
|
"golang.org/x/crypto/blake2b"
|
|
|
|
"golang.org/x/crypto/blake2s"
|
|
|
|
"golang.org/x/exp/slices"
|
2024-05-05 10:32:08 +00:00
|
|
|
"plemya-x.ru/alr/internal/dlcache"
|
|
|
|
"plemya-x.ru/alr/pkg/loggerctx"
|
2024-01-22 10:36:06 +00:00
|
|
|
)
|
|
|
|
|
2024-05-05 10:32:08 +00:00
|
|
|
const manifestFileName = ".alr_cache_manifest"
|
2024-01-22 10:36:06 +00:00
|
|
|
|
|
|
|
// ErrChecksumMismatch occurs when the checksum of a downloaded file
|
|
|
|
// does not match the expected checksum provided in the Options struct.
|
|
|
|
var (
|
|
|
|
ErrChecksumMismatch = errors.New("dl: checksums did not match")
|
|
|
|
ErrNoSuchHashAlgo = errors.New("dl: invalid hashing algorithm")
|
|
|
|
)
|
|
|
|
|
|
|
|
// Downloaders contains all the downloaders in the order in which
|
|
|
|
// they should be checked
|
|
|
|
var Downloaders = []Downloader{
|
|
|
|
GitDownloader{},
|
|
|
|
TorrentDownloader{},
|
|
|
|
FileDownloader{},
|
|
|
|
}
|
|
|
|
|
|
|
|
// Type represents the type of download (file or directory)
|
|
|
|
type Type uint8
|
|
|
|
|
|
|
|
const (
|
|
|
|
TypeFile Type = iota
|
|
|
|
TypeDir
|
|
|
|
)
|
|
|
|
|
|
|
|
func (t Type) String() string {
|
|
|
|
switch t {
|
|
|
|
case TypeFile:
|
|
|
|
return "file"
|
|
|
|
case TypeDir:
|
|
|
|
return "dir"
|
|
|
|
}
|
|
|
|
return "<unknown>"
|
|
|
|
}
|
|
|
|
|
|
|
|
// Options contains the options for downloading
|
|
|
|
// files and directories
|
|
|
|
type Options struct {
|
|
|
|
Hash []byte
|
|
|
|
HashAlgorithm string
|
|
|
|
Name string
|
|
|
|
URL string
|
|
|
|
Destination string
|
|
|
|
CacheDisabled bool
|
|
|
|
PostprocDisabled bool
|
|
|
|
Progress io.Writer
|
|
|
|
LocalDir string
|
|
|
|
}
|
|
|
|
|
|
|
|
func (opts Options) NewHash() (hash.Hash, error) {
|
|
|
|
switch opts.HashAlgorithm {
|
|
|
|
case "", "sha256":
|
|
|
|
return sha256.New(), nil
|
|
|
|
case "sha224":
|
|
|
|
return sha256.New224(), nil
|
|
|
|
case "sha512":
|
|
|
|
return sha512.New(), nil
|
|
|
|
case "sha384":
|
|
|
|
return sha512.New384(), nil
|
|
|
|
case "sha1":
|
|
|
|
return sha1.New(), nil
|
|
|
|
case "md5":
|
|
|
|
return md5.New(), nil
|
|
|
|
case "blake2s-128":
|
|
|
|
return blake2s.New256(nil)
|
|
|
|
case "blake2s-256":
|
|
|
|
return blake2s.New256(nil)
|
|
|
|
case "blake2b-256":
|
|
|
|
return blake2b.New(32, nil)
|
|
|
|
case "blake2b-512":
|
|
|
|
return blake2b.New(64, nil)
|
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("%w: %s", ErrNoSuchHashAlgo, opts.HashAlgorithm)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Manifest holds information about the type and name
|
|
|
|
// of a downloaded file or directory. It is stored inside
|
|
|
|
// each cache directory for later use.
|
|
|
|
type Manifest struct {
|
|
|
|
Type Type
|
|
|
|
Name string
|
|
|
|
}
|
|
|
|
|
|
|
|
type Downloader interface {
|
|
|
|
// Name returns the name of the downloader
|
|
|
|
Name() string
|
|
|
|
// MatchURL checks if the given URL matches
|
|
|
|
// the downloader.
|
|
|
|
MatchURL(string) bool
|
|
|
|
// Download downloads the object at the URL
|
|
|
|
// provided in the options, to the destination
|
|
|
|
// given in the options. It returns a type,
|
|
|
|
// a name for the downloaded object (this may be empty),
|
|
|
|
// and an error.
|
|
|
|
Download(Options) (Type, string, error)
|
|
|
|
}
|
|
|
|
|
|
|
|
// UpdatingDownloader extends the Downloader interface
|
|
|
|
// with an Update method for protocols such as git, which
|
|
|
|
// allow for incremental updates without changing the URL.
|
|
|
|
type UpdatingDownloader interface {
|
|
|
|
Downloader
|
|
|
|
// Update checks for and performs any
|
|
|
|
// available updates for the object
|
|
|
|
// described in the options. It returns
|
|
|
|
// true if an update was performed, or
|
|
|
|
// false if no update was required.
|
|
|
|
Update(Options) (bool, error)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Download downloads a file or directory using the specified options.
|
|
|
|
// It first gets the appropriate downloader for the URL, then checks
|
|
|
|
// if caching is enabled. If caching is enabled, it attempts to get
|
|
|
|
// the cache directory for the URL and update it if necessary.
|
|
|
|
// If the source is found in the cache, it links it to the destination
|
|
|
|
// using hard links. If the source is not found in the cache,
|
|
|
|
// it downloads the source to a new cache directory and links it
|
|
|
|
// to the destination.
|
|
|
|
func Download(ctx context.Context, opts Options) (err error) {
|
|
|
|
log := loggerctx.From(ctx)
|
|
|
|
normalized, err := normalizeURL(opts.URL)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
opts.URL = normalized
|
|
|
|
|
|
|
|
d := getDownloader(opts.URL)
|
|
|
|
|
|
|
|
if opts.CacheDisabled {
|
|
|
|
_, _, err = d.Download(opts)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
var t Type
|
|
|
|
cacheDir, ok := dlcache.Get(ctx, opts.URL)
|
|
|
|
if ok {
|
|
|
|
var updated bool
|
|
|
|
if d, ok := d.(UpdatingDownloader); ok {
|
|
|
|
log.Info("Source can be updated, updating if required").Str("source", opts.Name).Str("downloader", d.Name()).Send()
|
|
|
|
|
|
|
|
updated, err = d.Update(Options{
|
|
|
|
Hash: opts.Hash,
|
|
|
|
HashAlgorithm: opts.HashAlgorithm,
|
|
|
|
Name: opts.Name,
|
|
|
|
URL: opts.URL,
|
|
|
|
Destination: cacheDir,
|
|
|
|
Progress: opts.Progress,
|
|
|
|
LocalDir: opts.LocalDir,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
m, err := getManifest(cacheDir)
|
|
|
|
if err == nil {
|
|
|
|
t = m.Type
|
|
|
|
|
|
|
|
dest := filepath.Join(opts.Destination, m.Name)
|
|
|
|
ok, err := handleCache(cacheDir, dest, m.Name, t)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if ok && !updated {
|
|
|
|
log.Info("Source found in cache and linked to destination").Str("source", opts.Name).Stringer("type", t).Send()
|
|
|
|
return nil
|
|
|
|
} else if ok {
|
|
|
|
log.Info("Source updated and linked to destination").Str("source", opts.Name).Stringer("type", t).Send()
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// If we cannot read the manifest,
|
|
|
|
// this cache entry is invalid and
|
|
|
|
// the source must be re-downloaded.
|
|
|
|
err = os.RemoveAll(cacheDir)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Info("Downloading source").Str("source", opts.Name).Str("downloader", d.Name()).Send()
|
|
|
|
|
|
|
|
cacheDir, err = dlcache.New(ctx, opts.URL)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
t, name, err := d.Download(Options{
|
|
|
|
Hash: opts.Hash,
|
|
|
|
HashAlgorithm: opts.HashAlgorithm,
|
|
|
|
Name: opts.Name,
|
|
|
|
URL: opts.URL,
|
|
|
|
Destination: cacheDir,
|
|
|
|
Progress: opts.Progress,
|
|
|
|
LocalDir: opts.LocalDir,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
err = writeManifest(cacheDir, Manifest{t, name})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
dest := filepath.Join(opts.Destination, name)
|
|
|
|
_, err = handleCache(cacheDir, dest, name, t)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// writeManifest writes the manifest to the specified cache directory.
|
|
|
|
func writeManifest(cacheDir string, m Manifest) error {
|
|
|
|
fl, err := os.Create(filepath.Join(cacheDir, manifestFileName))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer fl.Close()
|
|
|
|
return msgpack.NewEncoder(fl).Encode(m)
|
|
|
|
}
|
|
|
|
|
|
|
|
// getManifest reads the manifest from the specified cache directory.
|
|
|
|
func getManifest(cacheDir string) (m Manifest, err error) {
|
|
|
|
fl, err := os.Open(filepath.Join(cacheDir, manifestFileName))
|
|
|
|
if err != nil {
|
|
|
|
return Manifest{}, err
|
|
|
|
}
|
|
|
|
defer fl.Close()
|
|
|
|
|
|
|
|
err = msgpack.NewDecoder(fl).Decode(&m)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// handleCache links the cache directory or a file within it to the destination
|
|
|
|
func handleCache(cacheDir, dest, name string, t Type) (bool, error) {
|
|
|
|
switch t {
|
|
|
|
case TypeFile:
|
|
|
|
cd, err := os.Open(cacheDir)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
names, err := cd.Readdirnames(0)
|
|
|
|
if err == io.EOF {
|
|
|
|
break
|
|
|
|
} else if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
cd.Close()
|
|
|
|
|
|
|
|
if slices.Contains(names, name) {
|
|
|
|
err = os.Link(filepath.Join(cacheDir, name), dest)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
case TypeDir:
|
|
|
|
err := linkDir(cacheDir, dest)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// linkDir recursively walks through a directory, creating
|
|
|
|
// hard links for each file from the src directory to the
|
|
|
|
// dest directory. If it encounters a directory, it will
|
|
|
|
// create a directory with the same name and permissions
|
|
|
|
// in the dest directory, because hard links cannot be
|
|
|
|
// created for directories.
|
|
|
|
func linkDir(src, dest string) error {
|
|
|
|
return filepath.Walk(src, func(path string, info os.FileInfo, err error) error {
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if info.Name() == manifestFileName {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
rel, err := filepath.Rel(src, path)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
newPath := filepath.Join(dest, rel)
|
|
|
|
if info.IsDir() {
|
|
|
|
return os.MkdirAll(newPath, info.Mode())
|
|
|
|
}
|
|
|
|
|
|
|
|
return os.Link(path, newPath)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func getDownloader(u string) Downloader {
|
|
|
|
for _, d := range Downloaders {
|
|
|
|
if d.MatchURL(u) {
|
|
|
|
return d
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// normalizeURL normalizes a URL string, so that insignificant
|
|
|
|
// differences don't change the hash.
|
|
|
|
func normalizeURL(u string) (string, error) {
|
|
|
|
const normalizationFlags = purell.FlagRemoveTrailingSlash |
|
|
|
|
purell.FlagRemoveDefaultPort |
|
|
|
|
purell.FlagLowercaseHost |
|
|
|
|
purell.FlagLowercaseScheme |
|
|
|
|
purell.FlagRemoveDuplicateSlashes |
|
|
|
|
purell.FlagRemoveFragment |
|
|
|
|
purell.FlagRemoveUnnecessaryHostDots |
|
|
|
|
purell.FlagSortQuery |
|
|
|
|
purell.FlagDecodeHexHost |
|
|
|
|
purell.FlagDecodeOctalHost |
|
|
|
|
purell.FlagDecodeUnnecessaryEscapes |
|
|
|
|
purell.FlagRemoveEmptyPortSeparator
|
|
|
|
|
|
|
|
u, err := purell.NormalizeURLString(u, normalizationFlags)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fix magnet URLs after normalization
|
|
|
|
u = strings.Replace(u, "magnet://", "magnet:", 1)
|
|
|
|
return u, nil
|
|
|
|
}
|