diff options
| author | Dan McGee <dan@archlinux.org> | 2010-12-29 18:43:44 -0600 | 
|---|---|---|
| committer | Dan McGee <dan@archlinux.org> | 2010-12-29 18:43:44 -0600 | 
| commit | a58083459b096e935693d94b9cb51a447b3a1abd (patch) | |
| tree | e21857a8cc0b3dbe562cfe015571280c41023729 /lib/libalpm | |
| parent | 126f50ab0b5ee3ed46c5a6ecae241e8af49b0fe2 (diff) | |
| parent | e3c19569cfe7cd77674490b30624e71512417e0b (diff) | |
Merge branch 'fgets-perf'
Diffstat (limited to 'lib/libalpm')
| -rw-r--r-- | lib/libalpm/be_package.c | 13 | ||||
| -rw-r--r-- | lib/libalpm/be_sync.c | 15 | ||||
| -rw-r--r-- | lib/libalpm/util.c | 100 | ||||
| -rw-r--r-- | lib/libalpm/util.h | 18 | 
4 files changed, 114 insertions, 32 deletions
| diff --git a/lib/libalpm/be_package.c b/lib/libalpm/be_package.c index f9f18d38..df5b28d1 100644 --- a/lib/libalpm/be_package.c +++ b/lib/libalpm/be_package.c @@ -155,17 +155,22 @@ static struct pkg_operations *get_file_pkg_ops(void)   */  static int parse_descfile(struct archive *a, pmpkg_t *newpkg)  { -	char line[PATH_MAX];  	char *ptr = NULL;  	char *key = NULL;  	int linenum = 0; +	struct archive_read_buffer buf;  	ALPM_LOG_FUNC; -	/* loop until we reach EOF (where archive_fgets will return NULL) */ -	while(_alpm_archive_fgets(line, PATH_MAX, a) != NULL) { +	memset(&buf, 0, sizeof(buf)); +	/* 512K for a line length seems reasonable */ +	buf.max_line_size = 512 * 1024; + +	/* loop until we reach EOF or other error */ +	while(_alpm_archive_fgets(a, &buf) == ARCHIVE_OK) { +		char *line = _alpm_strtrim(buf.line); +  		linenum++; -		_alpm_strtrim(line);  		if(strlen(line) == 0 || line[0] == '#') {  			continue;  		} diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index 137fc1b2..d0f98c20 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -219,8 +219,8 @@ static int sync_db_populate(pmdb_t *db)  }  #define READ_NEXT(s) do { \ -	if(_alpm_archive_fgets(s, sizeof(s), archive) == NULL) goto error; \ -	_alpm_strtrim(s); \ +	if(_alpm_archive_fgets(archive, &buf) != ARCHIVE_OK) goto error; \ +	s = _alpm_strtrim(buf.line); \  } while(0)  #define READ_AND_STORE(f) do { \ @@ -238,10 +238,10 @@ static int sync_db_populate(pmdb_t *db)  static int sync_db_read(pmdb_t *db, struct archive *archive, struct archive_entry *entry)  { -	char line[1024];  	const char *entryname = NULL;  	char *filename, *pkgname, *p, *q;  	pmpkg_t *pkg; +	struct archive_read_buffer buf;  	ALPM_LOG_FUNC; @@ -260,6 +260,10 @@ static int sync_db_read(pmdb_t *db, struct archive *archive, struct archive_entr  	_alpm_log(PM_LOG_FUNCTION, "loading package data from archive entry %s\n",  			entryname); +	memset(&buf, 0, sizeof(buf)); +	/* 512K for a line length seems reasonable */ +	buf.max_line_size = 512 * 1024; +  	/* get package and db file names */  	STRDUP(pkgname, entryname, RET_ERR(PM_ERR_MEMORY, -1));  	p = pkgname + strlen(pkgname); @@ -279,8 +283,9 @@ static int sync_db_read(pmdb_t *db, struct archive *archive, struct archive_entr  	if(strcmp(filename, "desc") == 0 || strcmp(filename, "depends") == 0  			|| strcmp(filename, "deltas") == 0) { -		while(_alpm_archive_fgets(line, sizeof(line), archive) != NULL) { -			_alpm_strtrim(line); +		while(_alpm_archive_fgets(archive, &buf) == ARCHIVE_OK) { +			char *line = _alpm_strtrim(buf.line); +  			if(strcmp(line, "%NAME%") == 0) {  				READ_NEXT(line);  				if(strcmp(line, pkg->name) != 0) { diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c index 1291ea0f..d34eab5e 100644 --- a/lib/libalpm/util.c +++ b/lib/libalpm/util.c @@ -771,33 +771,89 @@ int _alpm_test_md5sum(const char *filepath, const char *md5sum)  	return(ret);  } -char *_alpm_archive_fgets(char *line, size_t size, struct archive *a) +/* Note: does NOT handle sparse files on purpose for speed. */ +int _alpm_archive_fgets(struct archive *a, struct archive_read_buffer *b)  { -	/* for now, just read one char at a time until we get to a -	 * '\n' char. we can optimize this later with an internal -	 * buffer. */ -	/* leave room for zero terminator */ -	char *last = line + size - 1; -	char *i; - -	for(i = line; i < last; i++) { -		int ret = archive_read_data(a, i, 1); -		/* special check for first read- if null, return null, -		 * this indicates EOF */ -		if(i == line && (ret <= 0 || *i == '\0')) { -			return(NULL); +	char *i = NULL; +	int64_t offset; +	int done = 0; + +	while(1) { +		/* have we processed this entire block? */ +		if(b->block + b->block_size == b->block_offset) { +			if(b->ret == ARCHIVE_EOF) { +				/* reached end of archive on the last read, now we are out of data */ +				goto cleanup; +			} + +			/* zero-copy - this is the entire next block of data. */ +			b->ret = archive_read_data_block(a, (void*)&b->block, +					&b->block_size, &offset); +			b->block_offset = b->block; + +			/* error or end of archive with no data read, cleanup */ +			if(b->ret < ARCHIVE_OK || +					(b->block_size == 0 && b->ret == ARCHIVE_EOF)) { +				goto cleanup; +			}  		} -		/* check if read value was null or newline */ -		if(ret <= 0 || *i == '\0' || *i == '\n') { -			last = i + 1; -			break; + +		/* loop through the block looking for EOL characters */ +		for(i = b->block_offset; i < (b->block + b->block_size); i++) { +			/* check if read value was null or newline */ +			if(*i == '\0' || *i == '\n') { +				done = 1; +				break; +			}  		} -	} -	/* always null terminate the buffer */ -	*last = '\0'; +		/* allocate our buffer, or ensure our existing one is big enough */ +		if(!b->line) { +			/* set the initial buffer to the read block_size */ +			CALLOC(b->line, b->block_size + 1, sizeof(char), +					RET_ERR(PM_ERR_MEMORY, -1)); +			b->line_size = b->block_size + 1; +			b->line_offset = b->line; +		} else { +			size_t needed = (b->line_offset - b->line) + (i - b->block_offset) + 1; +			if(needed > b->max_line_size) { +				RET_ERR(PM_ERR_MEMORY, -1); +			} +			if(needed > b->line_size) { +				/* need to realloc + copy data to fit total length */ +				char *new; +				CALLOC(new, needed, sizeof(char), RET_ERR(PM_ERR_MEMORY, -1)); +				memcpy(new, b->line, b->line_size); +				b->line_size = needed; +				b->line_offset = new + (b->line_offset - b->line); +				free(b->line); +				b->line = new; +			} +		} + +		if(done) { +			size_t len = i - b->block_offset; +			memcpy(b->line_offset, b->block_offset, len); +			b->line_offset[len] = '\0'; +			b->block_offset = ++i; +			/* this is the main return point; from here you can read b->line */ +			return(ARCHIVE_OK); +		} else { +			/* we've looked through the whole block but no newline, copy it */ +			size_t len = b->block + b->block_size - b->block_offset; +			memcpy(b->line_offset, b->block_offset, len); +			b->line_offset += len; +			b->block_offset = i; +		} +	} -	return(line); +cleanup: +	{ +		int ret = b->ret; +		FREE(b->line); +		memset(b, 0, sizeof(b)); +		return(ret); +	}  }  int _alpm_splitname(const char *target, pmpkg_t *pkg) diff --git a/lib/libalpm/util.h b/lib/libalpm/util.h index 5464b239..543643b1 100644 --- a/lib/libalpm/util.h +++ b/lib/libalpm/util.h @@ -59,6 +59,22 @@  	_alpm_log(PM_LOG_DEBUG, "returning error %d from %s : %s\n", err, __func__, alpm_strerrorlast()); \  	return(ret); } while(0) +/** + * Used as a buffer/state holder for _alpm_archive_fgets(). + */ +struct archive_read_buffer { +	char *line; +	char *line_offset; +	size_t line_size; +	size_t max_line_size; + +	char *block; +	char *block_offset; +	size_t block_size; + +	int ret; +}; +  int _alpm_makepath(const char *path);  int _alpm_makepath_mode(const char *path, mode_t mode);  int _alpm_copyfile(const char *src, const char *dest); @@ -76,7 +92,7 @@ char *_alpm_filecache_find(const char *filename);  const char *_alpm_filecache_setup(void);  int _alpm_lstat(const char *path, struct stat *buf);  int _alpm_test_md5sum(const char *filepath, const char *md5sum); -char *_alpm_archive_fgets(char *line, size_t size, struct archive *a); +int _alpm_archive_fgets(struct archive *a, struct archive_read_buffer *b);  int _alpm_splitname(const char *target, pmpkg_t *pkg);  unsigned long _alpm_hash_sdbm(const char *str); | 
