From 701a03dcdb113e92b4f8de52a7a427dfdfc3dd27 Mon Sep 17 00:00:00 2001 From: Chantry Xavier Date: Sat, 16 Feb 2008 10:47:22 -0600 Subject: Completely rework delta algorithm Using the graph structures that Nagy set up for dependency sorting, we now do a similar process for deltas. Load up all of the deltas into a graph object on which we can then apply Dijkstra's algorithm, using the new weight field of graph struct. We initialize the nodes weight using the base files that we can use in our filecache (both filename and md5sum must match). The algorithm then picks the best path among those that can be resolved. Note that this algorithm has a few advantages over the old one: 1. It is completely file agnostic. These delta chains do not have to consist of package files- this could be adopted to do delta-fied DBs. 2. It does not use the local_db anymore, or even care if a package or file is currently installed. Instead, it only looks in the filecache for files and packages that match delta chain entries. Original-work-by: Dan McGee Signed-off-by: Chantry Xavier --- lib/libalpm/alpm.h | 2 +- lib/libalpm/delta.c | 232 +++++++++++++++++++++++++++++--------------------- lib/libalpm/delta.h | 8 +- lib/libalpm/graph.h | 1 + lib/libalpm/package.c | 1 + lib/libalpm/package.h | 2 + lib/libalpm/sync.c | 175 +++++++++++++++---------------------- lib/libalpm/util.c | 4 +- src/pacman/util.c | 2 +- 9 files changed, 218 insertions(+), 209 deletions(-) diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h index e5445d0d..288426d6 100644 --- a/lib/libalpm/alpm.h +++ b/lib/libalpm/alpm.h @@ -222,7 +222,7 @@ size_t alpm_pkg_changelog_read(void *ptr, size_t size, int alpm_pkg_changelog_close(const pmpkg_t *pkg, void *fp); unsigned short alpm_pkg_has_scriptlet(pmpkg_t *pkg); -unsigned long alpm_pkg_download_size(pmpkg_t *newpkg, pmdb_t *db_local); +unsigned long alpm_pkg_download_size(pmpkg_t *newpkg); /* * Deltas diff --git a/lib/libalpm/delta.c b/lib/libalpm/delta.c index 12fb9bd7..fdb4d99b 100644 --- a/lib/libalpm/delta.c +++ b/lib/libalpm/delta.c @@ -21,13 +21,14 @@ #include #include +#include /* libalpm */ #include "delta.h" +#include "alpm_list.h" #include "util.h" #include "log.h" -#include "alpm_list.h" -#include "alpm.h" +#include "graph.h" /** \addtogroup alpm_deltas Delta Functions * @brief Functions to manipulate libalpm deltas @@ -78,129 +79,170 @@ unsigned long SYMEXPORT alpm_delta_get_size(pmdelta_t *delta) /** @} */ -/** Calculates the combined size of a list of delta files. - * @param deltas the list of pmdelta_t * objects - * @return the combined size - */ -unsigned long _alpm_delta_path_size(alpm_list_t *deltas) +static alpm_list_t *delta_graph_init(alpm_list_t *deltas) { - unsigned long sum = 0; - alpm_list_t *dlts = deltas; - - while(dlts) { - pmdelta_t *d = alpm_list_getdata(dlts); - sum += d->delta_size; + alpm_list_t *i, *j; + alpm_list_t *vertices = NULL; + /* create the vertices */ + for(i = deltas; i; i = i->next) { + char *fpath, *md5sum; + pmgraph_t *v = _alpm_graph_new(); + pmdelta_t *vdelta = i->data; + vdelta->download_size = vdelta->delta_size; + v->weight = ULONG_MAX; + + /* determine whether the delta file already exists */ + fpath = _alpm_filecache_find(vdelta->delta); + md5sum = alpm_get_md5sum(fpath); + if(fpath && md5sum && strcmp(md5sum, vdelta->delta_md5) == 0) { + vdelta->download_size = 0; + } + FREE(fpath); + FREE(md5sum); + + /* determine whether a base 'from' file exists */ + fpath = _alpm_filecache_find(vdelta->from); + md5sum = alpm_get_md5sum(fpath); + if(fpath && md5sum && strcmp(md5sum, vdelta->from_md5) == 0) { + v->weight = vdelta->download_size; + } + FREE(fpath); + FREE(md5sum); - dlts = alpm_list_next(dlts); + v->data = vdelta; + vertices = alpm_list_add(vertices, v); } - return(sum); + /* compute the edges */ + for(i = vertices; i; i = i->next) { + pmgraph_t *v_i = i->data; + pmdelta_t *d_i = v_i->data; + /* loop a second time so we make all possible comparisons */ + for(j = vertices; j; j = j->next) { + pmgraph_t *v_j = j->data; + pmdelta_t *d_j = v_j->data; + /* We want to create a delta tree like the following: + * 1_to_2 + * | + * 1_to_3 2_to_3 + * \ / + * 3_to_4 + * If J 'from' is equal to I 'to', then J is a child of I. + * */ + if(strcmp(d_j->from, d_i->to) == 0 + && strcmp(d_j->from_md5, d_i->to_md5) == 0) { + v_i->children = alpm_list_add(v_i->children, v_j); + } + } + v_i->childptr = v_i->children; + } + return(vertices); } -/** Calculates the combined size of a list of delta files that are not - * in the cache. - * @param deltas the list of pmdelta_t * objects - * @return the combined size - */ -unsigned long _alpm_delta_path_size_uncached(alpm_list_t *deltas) -{ - unsigned long sum = 0; - alpm_list_t *dlts = deltas; - - while(dlts) { - pmdelta_t *d = alpm_list_getdata(dlts); - char *fname = _alpm_filecache_find(d->delta); +static unsigned long delta_vert(alpm_list_t *vertices, + const char *to, const char *to_md5, alpm_list_t **path) { + alpm_list_t *i; + pmgraph_t *v; + while(1) { + v = NULL; + /* find the smallest vertice not visited yet */ + for(i = vertices; i; i = i->next) { + pmgraph_t *v_i = i->data; + + if(v_i->state == -1) { + continue; + } - if(!fname) { - sum += d->delta_size; + if(v == NULL || v_i->weight < v->weight) { + v = v_i; + } + } + if(v == NULL || v->weight == ULONG_MAX) { + break; } - FREE(fname); - - dlts = alpm_list_next(dlts); - } + v->state = -1; - return(sum); -} + v->childptr = v->children; + while(v->childptr) { + pmgraph_t *v_c = v->childptr->data; + pmdelta_t *d_c = v_c->data; + if(v_c->weight > v->weight + d_c->download_size) { + v_c->weight = v->weight + d_c->download_size; + v_c->parent = v; + } -/** Calculates the shortest path from one version to another. - * The shortest path is defined as the path with the smallest combined - * size, not the length of the path. - * The algorithm is based on Dijkstra's shortest path algorithm. - * @param deltas the list of pmdelta_t * objects that a package has - * @param from the version to start from - * @param to the version to end at - * @param path the current path - * @return the list of pmdelta_t * objects that has the smallest size. - * NULL (the empty list) is returned if there is no path between the - * versions. - */ -static alpm_list_t *shortest_delta_path(alpm_list_t *deltas, - const char *from, const char *to, alpm_list_t *path) -{ - alpm_list_t *d; - alpm_list_t *shortest = NULL; + v->childptr = (v->childptr)->next; - /* Found the 'to' version, this is a good path so return it. */ - if(strcmp(from, to) == 0) { - return(path); + } } - for(d = deltas; d; d = alpm_list_next(d)) { - pmdelta_t *v = alpm_list_getdata(d); + v = NULL; + unsigned long bestsize = 0; - /* If this vertex has already been visited in the path, go to the - * next vertex. */ - if(alpm_list_find_ptr(path, v)) { - continue; - } + for(i = vertices; i; i = i->next) { + pmgraph_t *v_i = i->data; + pmdelta_t *d_i = v_i->data; - /* Once we find a vertex that starts at the 'from' version, - * recursively find the shortest path using the 'to' version of this - * current vertex as the 'from' version in the function call. */ - if(strcmp(v->from, from) == 0) { - alpm_list_t *newpath = alpm_list_copy(path); - newpath = alpm_list_add(newpath, v); - newpath = shortest_delta_path(deltas, v->to, to, newpath); - - if(newpath != NULL) { - /* The path returned works, now use it unless there is already a - * shorter path found. */ - if(shortest == NULL) { - shortest = newpath; - } else if(_alpm_delta_path_size(shortest) > _alpm_delta_path_size(newpath)) { - alpm_list_free(shortest); - shortest = newpath; - } else { - alpm_list_free(newpath); - } + if(strcmp(d_i->to, to) == 0 + || strcmp(d_i->to_md5, to_md5) == 0) { + if(v == NULL || v_i->weight < v->weight) { + v = v_i; + bestsize = v->weight; } } } - alpm_list_free(path); + alpm_list_t *rpath = NULL; + while(v != NULL) { + pmdelta_t *vdelta = v->data; + rpath = alpm_list_add(rpath, vdelta); + v = v->parent; + } + *path = alpm_list_reverse(rpath); + alpm_list_free(rpath); - return(shortest); + return(bestsize); } /** Calculates the shortest path from one version to another. * The shortest path is defined as the path with the smallest combined * size, not the length of the path. - * @param deltas the list of pmdelta_t * objects that a package has - * @param from the version to start from - * @param to the version to end at - * @return the list of pmdelta_t * objects that has the smallest size. - * NULL (the empty list) is returned if there is no path between the - * versions. + * @param deltas the list of pmdelta_t * objects that a file has + * @param to the file to start the search at + * @param to_md5 the md5sum of the above named file + * @param path the pointer to a list location where pmdelta_t * objects that + * have the smallest size are placed. NULL is set if there is no path + * possible with the files available. + * @return the size of the path stored, or ULONG_MAX if path is unfindable */ -alpm_list_t *_alpm_shortest_delta_path(alpm_list_t *deltas, const char *from, - const char *to) +unsigned long _alpm_shortest_delta_path(alpm_list_t *deltas, + const char *to, const char *to_md5, alpm_list_t **path) { - alpm_list_t *path = NULL; + alpm_list_t *bestpath = NULL; + alpm_list_t *vertices; + unsigned long bestsize = ULONG_MAX; + + ALPM_LOG_FUNC; + + if(deltas == NULL) { + *path = NULL; + return(bestsize); + } + + _alpm_log(PM_LOG_DEBUG, "started delta shortest-path search\n"); + + vertices = delta_graph_init(deltas); + + bestsize = delta_vert(vertices, to, to_md5, &bestpath); + + _alpm_log(PM_LOG_DEBUG, "delta shortest-path search complete\n"); - path = shortest_delta_path(deltas, from, to, path); + alpm_list_free_inner(vertices, _alpm_graph_free); + alpm_list_free(vertices); - return(path); + *path = bestpath; + return(bestsize); } /** Parses the string representation of a pmdelta_t object. diff --git a/lib/libalpm/delta.h b/lib/libalpm/delta.h index a2ac5f05..33d47e1e 100644 --- a/lib/libalpm/delta.h +++ b/lib/libalpm/delta.h @@ -36,14 +36,14 @@ struct __pmdelta_t { char *delta_md5; /** filesize of the delta file */ unsigned long delta_size; + /** download filesize of the delta file */ + unsigned long download_size; }; -unsigned long _alpm_delta_path_size(alpm_list_t *deltas); -unsigned long _alpm_delta_path_size_uncached(alpm_list_t *deltas); pmdelta_t *_alpm_delta_parse(char *line); void _alpm_delta_free(pmdelta_t *delta); -alpm_list_t *_alpm_shortest_delta_path(alpm_list_t *deltas, - const char *from, const char *to); +unsigned long _alpm_shortest_delta_path(alpm_list_t *deltas, + const char *to, const char *to_md5, alpm_list_t **path); #endif /* _ALPM_DELTA_H */ diff --git a/lib/libalpm/graph.h b/lib/libalpm/graph.h index e3e40023..3078e25f 100644 --- a/lib/libalpm/graph.h +++ b/lib/libalpm/graph.h @@ -24,6 +24,7 @@ struct __pmgraph_t { char state; /* 0: untouched, -1: entered, other: leaving time */ void *data; + unsigned long int weight; /* weight of the node */ struct __pmgraph_t *parent; /* where did we come from? */ alpm_list_t *children; alpm_list_t *childptr; /* points to a child in children list */ diff --git a/lib/libalpm/package.c b/lib/libalpm/package.c index 3277dd09..4baa1588 100644 --- a/lib/libalpm/package.c +++ b/lib/libalpm/package.c @@ -834,6 +834,7 @@ void _alpm_pkg_free(pmpkg_t *pkg) FREELIST(pkg->provides); alpm_list_free_inner(pkg->deltas, (alpm_list_fn_free)_alpm_delta_free); alpm_list_free(pkg->deltas); + alpm_list_free(pkg->delta_path); if(pkg->origin == PKG_FROM_FILE) { FREE(pkg->origin_data.file); diff --git a/lib/libalpm/package.h b/lib/libalpm/package.h index f3de05d6..0ba8aac4 100644 --- a/lib/libalpm/package.h +++ b/lib/libalpm/package.h @@ -70,6 +70,8 @@ struct __pmpkg_t { char *file; } origin_data; pmdbinfrq_t infolevel; + unsigned long download_size; + alpm_list_t *delta_path; }; int _alpm_versioncmp(const char *a, const char *b); diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c index 5146a994..3a4169e8 100644 --- a/lib/libalpm/sync.c +++ b/lib/libalpm/sync.c @@ -380,6 +380,47 @@ static int syncpkg_cmp(const void *s1, const void *s2) return(strcmp(alpm_pkg_get_name(p1), alpm_pkg_get_name(p2))); } +/** Compute the size of the files that will be downloaded to install a + * package. + * @param newpkg the new package to upgrade to + */ +static void compute_download_size(pmpkg_t *newpkg) +{ + char *fpath = _alpm_filecache_find(alpm_pkg_get_filename(newpkg)); + unsigned long size = 0; + + if(fpath) { + FREE(fpath); + size = 0; + } else if(handle->usedelta) { + unsigned long dltsize; + unsigned long pkgsize = alpm_pkg_get_size(newpkg); + + dltsize = _alpm_shortest_delta_path( + alpm_pkg_get_deltas(newpkg), + alpm_pkg_get_filename(newpkg), + alpm_pkg_get_md5sum(newpkg), + &newpkg->delta_path); + + if(newpkg->delta_path && (dltsize < pkgsize * MAX_DELTA_RATIO)) { + _alpm_log(PM_LOG_DEBUG, "using delta size\n"); + size = dltsize; + } else { + _alpm_log(PM_LOG_DEBUG, "using package size\n"); + size = alpm_pkg_get_size(newpkg); + alpm_list_free(newpkg->delta_path); + newpkg->delta_path = NULL; + } + } else { + size = alpm_pkg_get_size(newpkg); + } + + _alpm_log(PM_LOG_DEBUG, "returning size %ld for pkg %s\n", size, + alpm_pkg_get_name(newpkg)); + + newpkg->download_size = size; +} + int _alpm_sync_prepare(pmtrans_t *trans, pmdb_t *db_local, alpm_list_t *dbs_sync, alpm_list_t **data) { alpm_list_t *deps = NULL; @@ -601,6 +642,11 @@ int _alpm_sync_prepare(pmtrans_t *trans, pmdb_t *db_local, alpm_list_t *dbs_sync goto cleanup; } } + for(i = list; i; i = i->next) { + /* update download size field */ + pmpkg_t *spkg = i->data; + compute_download_size(spkg); + } cleanup: alpm_list_free(list); @@ -609,86 +655,14 @@ cleanup: return(ret); } -/** Returns a list of deltas that should be downloaded instead of the - * package. - * - * It first tests if a delta path exists between the currently installed - * version (if any) and the version to upgrade to. If so, the delta path - * is used if its size is below a set percentage (MAX_DELTA_RATIO) of - * the package size, Otherwise, an empty list is returned. - * - * @param newpkg the new package to upgrade to - * @param db_local the local database - * - * @return the list of pmdelta_t * objects. NULL (the empty list) is - * returned if the package should be downloaded instead of deltas. - */ -static alpm_list_t *pkg_upgrade_delta_path(pmpkg_t *newpkg, pmdb_t *db_local) -{ - pmpkg_t *oldpkg = alpm_db_get_pkg(db_local, newpkg->name); - alpm_list_t *ret = NULL; - - if(oldpkg) { - const char *oldname = alpm_pkg_get_filename(oldpkg); - char *oldpath = _alpm_filecache_find(oldname); - - if(oldpath) { - alpm_list_t *deltas = _alpm_shortest_delta_path( - alpm_pkg_get_deltas(newpkg), - alpm_pkg_get_version(oldpkg), - alpm_pkg_get_version(newpkg)); - - if(deltas) { - unsigned long dltsize = _alpm_delta_path_size(deltas); - unsigned long pkgsize = alpm_pkg_get_size(newpkg); - - if(dltsize < pkgsize * MAX_DELTA_RATIO) { - ret = deltas; - } else { - ret = NULL; - alpm_list_free(deltas); - } - } - - FREE(oldpath); - } - } - - return(ret); -} - /** Returns the size of the files that will be downloaded to install a * package. - * * @param newpkg the new package to upgrade to - * @param db_local the local database - * * @return the size of the download */ -unsigned long SYMEXPORT alpm_pkg_download_size(pmpkg_t *newpkg, pmdb_t *db_local) +unsigned long SYMEXPORT alpm_pkg_download_size(pmpkg_t *newpkg) { - char *fpath = _alpm_filecache_find(alpm_pkg_get_filename(newpkg)); - unsigned long size = 0; - - if(fpath) { - size = 0; - } else if(handle->usedelta) { - alpm_list_t *deltas = pkg_upgrade_delta_path(newpkg, db_local); - - if(deltas) { - size = _alpm_delta_path_size_uncached(deltas); - } else { - size = alpm_pkg_get_size(newpkg); - } - - alpm_list_free(deltas); - } else { - size = alpm_pkg_get_size(newpkg); - } - - FREE(fpath); - - return(size); + return(newpkg->download_size); } /** Applies delta files to create an upgraded package file. @@ -849,46 +823,35 @@ int _alpm_sync_commit(pmtrans_t *trans, pmdb_t *db_local, alpm_list_t **data) EVENT(trans, PM_TRANS_EVT_PRINTURI, (char *)alpm_db_get_url(current), (char *)fname); } else { - char *fpath = _alpm_filecache_find(fname); - if(!fpath) { - if(handle->usedelta) { - alpm_list_t *delta_path = pkg_upgrade_delta_path(spkg, db_local); - - if(delta_path) { - alpm_list_t *dlts = NULL; - - for(dlts = delta_path; dlts; dlts = alpm_list_next(dlts)) { - pmdelta_t *d = (pmdelta_t *)alpm_list_getdata(dlts); - char *fpath2 = _alpm_filecache_find(d->delta); - - if(!fpath2) { - /* add the delta filename to the download list if - * it's not in the cache */ - files = alpm_list_add(files, strdup(d->delta)); - } - - /* save the package and delta so that the xdelta patch - * command can be run after the downloads finish */ - patches = alpm_list_add(patches, spkg); - patches = alpm_list_add(patches, d); - - /* keep a list of the delta files for md5sums */ - deltas = alpm_list_add(deltas, d); + if(spkg->download_size != 0) { + alpm_list_t *delta_path = spkg->delta_path; + if(delta_path) { + alpm_list_t *dlts = NULL; + + for(dlts = delta_path; dlts; dlts = alpm_list_next(dlts)) { + pmdelta_t *d = (pmdelta_t *)alpm_list_getdata(dlts); + char *fpath2 = _alpm_filecache_find(d->delta); + + if(!fpath2) { + /* add the delta filename to the download list if + * it's not in the cache */ + files = alpm_list_add(files, strdup(d->delta)); } - alpm_list_free(delta_path); - delta_path = NULL; - } else { - /* no deltas to download, so add the file to the - * download list */ - files = alpm_list_add(files, strdup(fname)); + /* save the package and delta so that the xdelta patch + * command can be run after the downloads finish */ + patches = alpm_list_add(patches, spkg); + patches = alpm_list_add(patches, d); + + /* keep a list of the delta files for md5sums */ + deltas = alpm_list_add(deltas, d); } + } else { /* not using deltas, so add the file to the download list */ files = alpm_list_add(files, strdup(fname)); } } - FREE(fpath); } } } diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c index 9f431ed7..a7a6573a 100644 --- a/lib/libalpm/util.c +++ b/lib/libalpm/util.c @@ -545,8 +545,8 @@ int _alpm_str_cmp(const void *s1, const void *s2) return(strcmp(s1, s2)); } -/** Find a package file in an alpm cachedir. - * @param filename name of package file to find +/** Find a filename in a registered alpm cachedir. + * @param filename name of file to find * @return malloced path of file, NULL if not found */ char *_alpm_filecache_find(const char* filename) diff --git a/src/pacman/util.c b/src/pacman/util.c index d932b441..9617e6f2 100644 --- a/src/pacman/util.c +++ b/src/pacman/util.c @@ -510,7 +510,7 @@ void display_targets(const alpm_list_t *syncpkgs, pmdb_t *db_local) } dispsize = alpm_pkg_get_size(pkg); - dlsize += alpm_pkg_download_size(pkg, db_local); + dlsize += alpm_pkg_download_size(pkg); isize += alpm_pkg_get_isize(pkg); /* print the package size with the output if ShowSize option set */ -- cgit v1.2.3-70-g09d2