diff options
author | Chantry Xavier <shiningxc@gmail.com> | 2008-02-16 10:47:22 -0600 |
---|---|---|
committer | Dan McGee <dan@archlinux.org> | 2008-04-26 11:36:01 -0500 |
commit | 701a03dcdb113e92b4f8de52a7a427dfdfc3dd27 (patch) | |
tree | 670bae62f1dce6ea8c22a7afa9f5ba79aa4f1573 /lib | |
parent | 30bdf94c2b444ff475a32e7b0c569e8c3cf05797 (diff) |
Completely rework delta algorithm
Using the graph structures that Nagy set up for dependency sorting, we now
do a similar process for deltas. Load up all of the deltas into a graph
object on which we can then apply Dijkstra's algorithm, using the new weight
field of graph struct.
We initialize the nodes weight using the base files that we can use in our
filecache (both filename and md5sum must match). The algorithm then picks
the best path among those that can be resolved.
Note that this algorithm has a few advantages over the old one:
1. It is completely file agnostic. These delta chains do not have to consist
of package files- this could be adopted to do delta-fied DBs.
2. It does not use the local_db anymore, or even care if a package or file
is currently installed. Instead, it only looks in the filecache for files
and packages that match delta chain entries.
Original-work-by: Dan McGee <dan@archlinux.org>
Signed-off-by: Chantry Xavier <shiningxc@gmail.com>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/libalpm/alpm.h | 2 | ||||
-rw-r--r-- | lib/libalpm/delta.c | 232 | ||||
-rw-r--r-- | lib/libalpm/delta.h | 8 | ||||
-rw-r--r-- | lib/libalpm/graph.h | 1 | ||||
-rw-r--r-- | lib/libalpm/package.c | 1 | ||||
-rw-r--r-- | lib/libalpm/package.h | 2 | ||||
-rw-r--r-- | lib/libalpm/sync.c | 175 | ||||
-rw-r--r-- | lib/libalpm/util.c | 4 |
8 files changed, 217 insertions, 208 deletions
diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h index e5445d0d..288426d6 100644 --- a/lib/libalpm/alpm.h +++ b/lib/libalpm/alpm.h @@ -222,7 +222,7 @@ size_t alpm_pkg_changelog_read(void *ptr, size_t size, int alpm_pkg_changelog_close(const pmpkg_t *pkg, void *fp); unsigned short alpm_pkg_has_scriptlet(pmpkg_t *pkg); -unsigned long alpm_pkg_download_size(pmpkg_t *newpkg, pmdb_t *db_local); +unsigned long alpm_pkg_download_size(pmpkg_t *newpkg); /* * Deltas diff --git a/lib/libalpm/delta.c b/lib/libalpm/delta.c index 12fb9bd7..fdb4d99b 100644 --- a/lib/libalpm/delta.c +++ b/lib/libalpm/delta.c @@ -21,13 +21,14 @@ #include <stdlib.h> #include <string.h> +#include <limits.h> /* libalpm */ #include "delta.h" +#include "alpm_list.h" #include "util.h" #include "log.h" -#include "alpm_list.h" -#include "alpm.h" +#include "graph.h" /** \addtogroup alpm_deltas Delta Functions * @brief Functions to manipulate libalpm deltas @@ -78,129 +79,170 @@ unsigned long SYMEXPORT alpm_delta_get_size(pmdelta_t *delta) /** @} */ -/** Calculates the combined size of a list of delta files. - * @param deltas the list of pmdelta_t * objects - * @return the combined size - */ -unsigned long _alpm_delta_path_size(alpm_list_t *deltas) +static alpm_list_t *delta_graph_init(alpm_list_t *deltas) { - unsigned long sum = 0; - alpm_list_t *dlts = deltas; - - while(dlts) { - pmdelta_t *d = alpm_list_getdata(dlts); - sum += d->delta_size; + alpm_list_t *i, *j; + alpm_list_t *vertices = NULL; + /* create the vertices */ + for(i = deltas; i; i = i->next) { + char *fpath, *md5sum; + pmgraph_t *v = _alpm_graph_new(); + pmdelta_t *vdelta = i->data; + vdelta->download_size = vdelta->delta_size; + v->weight = ULONG_MAX; + + /* determine whether the delta file already exists */ + fpath = _alpm_filecache_find(vdelta->delta); + md5sum = alpm_get_md5sum(fpath); + if(fpath && md5sum && strcmp(md5sum, vdelta->delta_md5) == 0) { + vdelta->download_size = 0; + } + FREE(fpath); + FREE(md5sum); + + /* determine whether a base 'from' file exists */ + fpath = _alpm_filecache_find(vdelta->from); + md5sum = alpm_get_md5sum(fpath); + if(fpath && md5sum && strcmp(md5sum, vdelta->from_md5) == 0) { + v->weight = vdelta->download_size; + } + FREE(fpath); + FREE(md5sum); - dlts = alpm_list_next(dlts); + v->data = vdelta; + vertices = alpm_list_add(vertices, v); } - return(sum); + /* compute the edges */ + for(i = vertices; i; i = i->next) { + pmgraph_t *v_i = i->data; + pmdelta_t *d_i = v_i->data; + /* loop a second time so we make all possible comparisons */ + for(j = vertices; j; j = j->next) { + pmgraph_t *v_j = j->data; + pmdelta_t *d_j = v_j->data; + /* We want to create a delta tree like the following: + * 1_to_2 + * | + * 1_to_3 2_to_3 + * \ / + * 3_to_4 + * If J 'from' is equal to I 'to', then J is a child of I. + * */ + if(strcmp(d_j->from, d_i->to) == 0 + && strcmp(d_j->from_md5, d_i->to_md5) == 0) { + v_i->children = alpm_list_add(v_i->children, v_j); + } + } + v_i->childptr = v_i->children; + } + return(vertices); } -/** Calculates the combined size of a list of delta files that are not - * in the cache. - * @param deltas the list of pmdelta_t * objects - * @return the combined size - */ -unsigned long _alpm_delta_path_size_uncached(alpm_list_t *deltas) -{ - unsigned long sum = 0; - alpm_list_t *dlts = deltas; - - while(dlts) { - pmdelta_t *d = alpm_list_getdata(dlts); - char *fname = _alpm_filecache_find(d->delta); +static unsigned long delta_vert(alpm_list_t *vertices, + const char *to, const char *to_md5, alpm_list_t **path) { + alpm_list_t *i; + pmgraph_t *v; + while(1) { + v = NULL; + /* find the smallest vertice not visited yet */ + for(i = vertices; i; i = i->next) { + pmgraph_t *v_i = i->data; + + if(v_i->state == -1) { + continue; + } - if(!fname) { - sum += d->delta_size; + if(v == NULL || v_i->weight < v->weight) { + v = v_i; + } + } + if(v == NULL || v->weight == ULONG_MAX) { + break; } - FREE(fname); - - dlts = alpm_list_next(dlts); - } + v->state = -1; - return(sum); -} + v->childptr = v->children; + while(v->childptr) { + pmgraph_t *v_c = v->childptr->data; + pmdelta_t *d_c = v_c->data; + if(v_c->weight > v->weight + d_c->download_size) { + v_c->weight = v->weight + d_c->download_size; + v_c->parent = v; + } -/** Calculates the shortest path from one version to another. - * The shortest path is defined as the path with the smallest combined - * size, not the length of the path. - * The algorithm is based on Dijkstra's shortest path algorithm. - * @param deltas the list of pmdelta_t * objects that a package has - * @param from the version to start from - * @param to the version to end at - * @param path the current path - * @return the list of pmdelta_t * objects that has the smallest size. - * NULL (the empty list) is returned if there is no path between the - * versions. - */ -static alpm_list_t *shortest_delta_path(alpm_list_t *deltas, - const char *from, const char *to, alpm_list_t *path) -{ - alpm_list_t *d; - alpm_list_t *shortest = NULL; + v->childptr = (v->childptr)->next; - /* Found the 'to' version, this is a good path so return it. */ - if(strcmp(from, to) == 0) { - return(path); + } } - for(d = deltas; d; d = alpm_list_next(d)) { - pmdelta_t *v = alpm_list_getdata(d); + v = NULL; + unsigned long bestsize = 0; - /* If this vertex has already been visited in the path, go to the - * next vertex. */ - if(alpm_list_find_ptr(path, v)) { - continue; - } + for(i = vertices; i; i = i->next) { + pmgraph_t *v_i = i->data; + pmdelta_t *d_i = v_i->data; - /* Once we find a vertex that starts at the 'from' version, - * recursively find the shortest path using the 'to' version of this - * current vertex as the 'from' version in the function call. */ - if(strcmp(v->from, from) == 0) { - alpm_list_t *newpath = alpm_list_copy(path); - newpath = alpm_list_add(newpath, v); - newpath = shortest_delta_path(deltas, v->to, to, newpath); - - if(newpath != NULL) { - /* The path returned works, now use it unless there is already a - * shorter path found. */ - if(shortest == NULL) { - shortest = newpath; - } else if(_alpm_delta_path_size(shortest) > _alpm_delta_path_size(newpath)) { - alpm_list_free(shortest); - shortest = newpath; - } else { - alpm_list_free(newpath); - } + if(strcmp(d_i->to, to) == 0 + || strcmp(d_i->to_md5, to_md5) == 0) { + if(v == NULL || v_i->weight < v->weight) { + v = v_i; + bestsize = v->weight; } } } - alpm_list_free(path); + alpm_list_t *rpath = NULL; + while(v != NULL) { + pmdelta_t *vdelta = v->data; + rpath = alpm_list_add(rpath, vdelta); + v = v->parent; + } + *path = alpm_list_reverse(rpath); + alpm_list_free(rpath); - return(shortest); + return(bestsize); } /** Calculates the shortest path from one version to another. * The shortest path is defined as the path with the smallest combined * size, not the length of the path. - * @param deltas the list of pmdelta_t * objects that a package has - * @param from the version to start from - * @param to the version to end at - * @return the list of pmdelta_t * objects that has the smallest size. - * NULL (the empty list) is returned if there is no path between the - * versions. + * @param deltas the list of pmdelta_t * objects that a file has + * @param to the file to start the search at + * @param to_md5 the md5sum of the above named file + * @param path the pointer to a list location where pmdelta_t * objects that + * have the smallest size are placed. NULL is set if there is no path + * possible with the files available. + * @return the size of the path stored, or ULONG_MAX if path is unfindable */ -alpm_list_t *_alpm_shortest_delta_path(alpm_list_t *deltas, const char *from, - const char *to) +unsigned long _alpm_shortest_delta_path(alpm_list_t *deltas, + const char *to, const char *to_md5, alpm_list_t **path) { - alpm_list_t *path = NULL; + alpm_list_t *bestpath = NULL; + alpm_list_t *vertices; + unsigned long bestsize = ULONG_MAX; + + ALPM_LOG_FUNC; + + if(deltas == NULL) { + *path = NULL; + return(bestsize); + } + + _alpm_log(PM_LOG_DEBUG, "started delta shortest-path search\n"); + + vertices = delta_graph_init(deltas); + + bestsize = delta_vert(vertices, to, to_md5, &bestpath); + + _alpm_log(PM_LOG_DEBUG, "delta shortest-path search complete\n"); - path = shortest_delta_path(deltas, from, to, path); + alpm_list_free_inner(vertices, _alpm_graph_free); + alpm_list_free(vertices); - return(path); + *path = bestpath; + return(bestsize); } /** Parses the string representation of a pmdelta_t object. diff --git a/lib/libalpm/delta.h b/lib/libalpm/delta.h index a2ac5f05..33d47e1e 100644 --- a/lib/libalpm/delta.h +++ b/lib/libalpm/delta.h @@ -36,14 +36,14 @@ struct __pmdelta_t { char *delta_md5; /** filesize of the delta file */ unsigned long delta_size; + /** download filesize of the delta file */ + unsigned long download_size; }; -unsigned long _alpm_delta_path_size(alpm_list_t *deltas); -unsigned long _alpm_delta_path_size_uncached(alpm_list_t *deltas); pmdelta_t *_alpm_delta_parse(char *line); void _alpm_delta_free(pmdelta_t *delta); -alpm_list_t *_alpm_shortest_delta_path(alpm_list_t *deltas, - const char *from, const char *to); +unsigned long _alpm_shortest_delta_path(alpm_list_t *deltas, + const char *to, const char *to_md5, alpm_list_t **path); #endif /* _ALPM_DELTA_H */ diff --git a/lib/libalpm/graph.h b/lib/libalpm/graph.h index e3e40023..3078e25f 100644 --- a/lib/libalpm/graph.h +++ b/lib/libalpm/graph.h @@ -24,6 +24,7 @@ struct __pmgraph_t { char state; /* 0: untouched, -1: entered, other: leaving time */ void *data; + unsigned long int weight; /* weight of the node */ struct __pmgraph_t *parent; /* where did we come from? */ alpm_list_t *children; alpm_list_t *childptr; /* points to a child in children list */ diff --git a/lib/libalpm/package.c b/lib/libalpm/package.c index 3277dd09..4baa1588 100644 --- a/lib/libalpm/package.c +++ b/lib/libalpm/package.c @@ -834,6 +834,7 @@ void _alpm_pkg_free(pmpkg_t *pkg) FREELIST(pkg->provides); alpm_list_free_inner(pkg->deltas, (alpm_list_fn_free)_alpm_delta_free); alpm_list_free(pkg->deltas); + alpm_list_free(pkg->delta_path); if(pkg->origin == PKG_FROM_FILE) { FREE(pkg->origin_data.file); diff --git a/lib/libalpm/package.h b/lib/libalpm/package.h index f3de05d6..0ba8aac4 100644 --- a/lib/libalpm/package.h +++ b/lib/libalpm/package.h @@ -70,6 +70,8 @@ struct __pmpkg_t { char *file; } origin_data; pmdbinfrq_t infolevel; + unsigned long download_size; + alpm_list_t *delta_path; }; int _alpm_versioncmp(const char *a, const char *b); diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c index 5146a994..3a4169e8 100644 --- a/lib/libalpm/sync.c +++ b/lib/libalpm/sync.c @@ -380,6 +380,47 @@ static int syncpkg_cmp(const void *s1, const void *s2) return(strcmp(alpm_pkg_get_name(p1), alpm_pkg_get_name(p2))); } +/** Compute the size of the files that will be downloaded to install a + * package. + * @param newpkg the new package to upgrade to + */ +static void compute_download_size(pmpkg_t *newpkg) +{ + char *fpath = _alpm_filecache_find(alpm_pkg_get_filename(newpkg)); + unsigned long size = 0; + + if(fpath) { + FREE(fpath); + size = 0; + } else if(handle->usedelta) { + unsigned long dltsize; + unsigned long pkgsize = alpm_pkg_get_size(newpkg); + + dltsize = _alpm_shortest_delta_path( + alpm_pkg_get_deltas(newpkg), + alpm_pkg_get_filename(newpkg), + alpm_pkg_get_md5sum(newpkg), + &newpkg->delta_path); + + if(newpkg->delta_path && (dltsize < pkgsize * MAX_DELTA_RATIO)) { + _alpm_log(PM_LOG_DEBUG, "using delta size\n"); + size = dltsize; + } else { + _alpm_log(PM_LOG_DEBUG, "using package size\n"); + size = alpm_pkg_get_size(newpkg); + alpm_list_free(newpkg->delta_path); + newpkg->delta_path = NULL; + } + } else { + size = alpm_pkg_get_size(newpkg); + } + + _alpm_log(PM_LOG_DEBUG, "returning size %ld for pkg %s\n", size, + alpm_pkg_get_name(newpkg)); + + newpkg->download_size = size; +} + int _alpm_sync_prepare(pmtrans_t *trans, pmdb_t *db_local, alpm_list_t *dbs_sync, alpm_list_t **data) { alpm_list_t *deps = NULL; @@ -601,6 +642,11 @@ int _alpm_sync_prepare(pmtrans_t *trans, pmdb_t *db_local, alpm_list_t *dbs_sync goto cleanup; } } + for(i = list; i; i = i->next) { + /* update download size field */ + pmpkg_t *spkg = i->data; + compute_download_size(spkg); + } cleanup: alpm_list_free(list); @@ -609,86 +655,14 @@ cleanup: return(ret); } -/** Returns a list of deltas that should be downloaded instead of the - * package. - * - * It first tests if a delta path exists between the currently installed - * version (if any) and the version to upgrade to. If so, the delta path - * is used if its size is below a set percentage (MAX_DELTA_RATIO) of - * the package size, Otherwise, an empty list is returned. - * - * @param newpkg the new package to upgrade to - * @param db_local the local database - * - * @return the list of pmdelta_t * objects. NULL (the empty list) is - * returned if the package should be downloaded instead of deltas. - */ -static alpm_list_t *pkg_upgrade_delta_path(pmpkg_t *newpkg, pmdb_t *db_local) -{ - pmpkg_t *oldpkg = alpm_db_get_pkg(db_local, newpkg->name); - alpm_list_t *ret = NULL; - - if(oldpkg) { - const char *oldname = alpm_pkg_get_filename(oldpkg); - char *oldpath = _alpm_filecache_find(oldname); - - if(oldpath) { - alpm_list_t *deltas = _alpm_shortest_delta_path( - alpm_pkg_get_deltas(newpkg), - alpm_pkg_get_version(oldpkg), - alpm_pkg_get_version(newpkg)); - - if(deltas) { - unsigned long dltsize = _alpm_delta_path_size(deltas); - unsigned long pkgsize = alpm_pkg_get_size(newpkg); - - if(dltsize < pkgsize * MAX_DELTA_RATIO) { - ret = deltas; - } else { - ret = NULL; - alpm_list_free(deltas); - } - } - - FREE(oldpath); - } - } - - return(ret); -} - /** Returns the size of the files that will be downloaded to install a * package. - * * @param newpkg the new package to upgrade to - * @param db_local the local database - * * @return the size of the download */ -unsigned long SYMEXPORT alpm_pkg_download_size(pmpkg_t *newpkg, pmdb_t *db_local) +unsigned long SYMEXPORT alpm_pkg_download_size(pmpkg_t *newpkg) { - char *fpath = _alpm_filecache_find(alpm_pkg_get_filename(newpkg)); - unsigned long size = 0; - - if(fpath) { - size = 0; - } else if(handle->usedelta) { - alpm_list_t *deltas = pkg_upgrade_delta_path(newpkg, db_local); - - if(deltas) { - size = _alpm_delta_path_size_uncached(deltas); - } else { - size = alpm_pkg_get_size(newpkg); - } - - alpm_list_free(deltas); - } else { - size = alpm_pkg_get_size(newpkg); - } - - FREE(fpath); - - return(size); + return(newpkg->download_size); } /** Applies delta files to create an upgraded package file. @@ -849,46 +823,35 @@ int _alpm_sync_commit(pmtrans_t *trans, pmdb_t *db_local, alpm_list_t **data) EVENT(trans, PM_TRANS_EVT_PRINTURI, (char *)alpm_db_get_url(current), (char *)fname); } else { - char *fpath = _alpm_filecache_find(fname); - if(!fpath) { - if(handle->usedelta) { - alpm_list_t *delta_path = pkg_upgrade_delta_path(spkg, db_local); - - if(delta_path) { - alpm_list_t *dlts = NULL; - - for(dlts = delta_path; dlts; dlts = alpm_list_next(dlts)) { - pmdelta_t *d = (pmdelta_t *)alpm_list_getdata(dlts); - char *fpath2 = _alpm_filecache_find(d->delta); - - if(!fpath2) { - /* add the delta filename to the download list if - * it's not in the cache */ - files = alpm_list_add(files, strdup(d->delta)); - } - - /* save the package and delta so that the xdelta patch - * command can be run after the downloads finish */ - patches = alpm_list_add(patches, spkg); - patches = alpm_list_add(patches, d); - - /* keep a list of the delta files for md5sums */ - deltas = alpm_list_add(deltas, d); + if(spkg->download_size != 0) { + alpm_list_t *delta_path = spkg->delta_path; + if(delta_path) { + alpm_list_t *dlts = NULL; + + for(dlts = delta_path; dlts; dlts = alpm_list_next(dlts)) { + pmdelta_t *d = (pmdelta_t *)alpm_list_getdata(dlts); + char *fpath2 = _alpm_filecache_find(d->delta); + + if(!fpath2) { + /* add the delta filename to the download list if + * it's not in the cache */ + files = alpm_list_add(files, strdup(d->delta)); } - alpm_list_free(delta_path); - delta_path = NULL; - } else { - /* no deltas to download, so add the file to the - * download list */ - files = alpm_list_add(files, strdup(fname)); + /* save the package and delta so that the xdelta patch + * command can be run after the downloads finish */ + patches = alpm_list_add(patches, spkg); + patches = alpm_list_add(patches, d); + + /* keep a list of the delta files for md5sums */ + deltas = alpm_list_add(deltas, d); } + } else { /* not using deltas, so add the file to the download list */ files = alpm_list_add(files, strdup(fname)); } } - FREE(fpath); } } } diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c index 9f431ed7..a7a6573a 100644 --- a/lib/libalpm/util.c +++ b/lib/libalpm/util.c @@ -545,8 +545,8 @@ int _alpm_str_cmp(const void *s1, const void *s2) return(strcmp(s1, s2)); } -/** Find a package file in an alpm cachedir. - * @param filename name of package file to find +/** Find a filename in a registered alpm cachedir. + * @param filename name of file to find * @return malloced path of file, NULL if not found */ char *_alpm_filecache_find(const char* filename) |