Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions src/sas/readstat_sas.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,12 @@ typedef struct sas_text_ref_s {
#define SAS_PAGE_HEADER_SIZE_32BIT 24
#define SAS_PAGE_HEADER_SIZE_64BIT 40

#define SAS_COMPRESSION_NONE 0x00
#define SAS_COMPRESSION_TRUNC 0x01
#define SAS_COMPRESSION_ROW 0x04
#define SAS_COMPRESSION_NONE 0x00
#define SAS_COMPRESSION_TRUNC 0x01
#define SAS_COMPRESSION_MOVED 0x03
#define SAS_COMPRESSION_ROW 0x04
#define SAS_COMPRESSION_ROW_MOVED 0x06
#define SAS_COMPRESSION_MYSTERY 0x0d

#define SAS_COMPRESSION_SIGNATURE_RLE "SASYZCRL"
#define SAS_COMPRESSION_SIGNATURE_RDC "SASYZCR2"
Expand Down
108 changes: 105 additions & 3 deletions src/sas/readstat_sas7bdat_read.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ typedef struct sas7bdat_ctx_s {
readstat_io_t *io;
int bswap;
int did_submit_columns;
int requires_seek;

uint32_t row_length;
uint32_t page_row_count;
Expand Down Expand Up @@ -880,7 +881,7 @@ static readstat_error_t sas7bdat_parse_page_pass1(const char *page, size_t page_
if ((retval = sas7bdat_parse_subheader_pointer(shp, page + page_size - shp, &shp_info, ctx)) != READSTAT_OK) {
goto cleanup;
}
if (shp_info.len > 0 && shp_info.compression != SAS_COMPRESSION_TRUNC) {
if (shp_info.len > 0 && shp_info.compression != SAS_COMPRESSION_TRUNC && shp_info.compression != SAS_COMPRESSION_MOVED) {
if ((retval = sas7bdat_validate_subheader_pointer(&shp_info, page_size, subheader_count, ctx)) != READSTAT_OK) {
goto cleanup;
}
Expand All @@ -895,7 +896,8 @@ static readstat_error_t sas7bdat_parse_page_pass1(const char *page, size_t page_
goto cleanup;
}
}
} else if (shp_info.compression == SAS_COMPRESSION_ROW) {
} else if (shp_info.compression == SAS_COMPRESSION_ROW || shp_info.compression == SAS_COMPRESSION_ROW_MOVED ||
shp_info.compression == SAS_COMPRESSION_MYSTERY) {
/* void */
} else {
retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION;
Expand All @@ -911,6 +913,78 @@ static readstat_error_t sas7bdat_parse_page_pass1(const char *page, size_t page_
return retval;
}

static readstat_error_t sas7bdat_parse_moved_row(uint64_t page_index, uint64_t subheader_index, sas7bdat_ctx_t* ctx) {
readstat_error_t retval = READSTAT_OK;
readstat_io_t* io = ctx->io;

const size_t page_size = ctx->page_size;
char* page = NULL;

ctx->requires_seek = 1;
if (io->seek(ctx->header_size + page_index * page_size, READSTAT_SEEK_SET, io->io_ctx) == -1) {
retval = READSTAT_ERROR_SEEK;
if (ctx->handle.error) {
snprintf(ctx->error_buf, sizeof(ctx->error_buf), "ReadStat: Failed to seek to position %" PRId64
" (= %" PRId64 " + %" PRId64 "*%" PRId64 ")",
ctx->header_size + page_index * page_size, ctx->header_size, page_index, page_size);
ctx->handle.error(ctx->error_buf, ctx->user_ctx);
}
goto cleanup;
}
if ((page = readstat_malloc(page_size)) == NULL) {
retval = READSTAT_ERROR_MALLOC;
goto cleanup;
}
if (io->read(page, page_size, io->io_ctx) < page_size) {
retval = READSTAT_ERROR_READ;
goto cleanup;
}

uint16_t page_type = sas_read2(&page[ctx->page_header_size - 8], ctx->bswap);
if ((page_type & SAS_PAGE_TYPE_MASK) == SAS_PAGE_TYPE_DATA || page_type & SAS_PAGE_TYPE_COMP) {
retval = READSTAT_ERROR_READ;
goto cleanup;
}
uint16_t subheader_count = sas_read2(&page[ctx->page_header_size - 4], ctx->bswap);
if (subheader_index >= subheader_count) {
retval = READSTAT_ERROR_READ;
goto cleanup;
}
uint64_t shp_offset = ctx->page_header_size + subheader_index * ctx->subheader_pointer_size;
if (shp_offset + ctx->subheader_pointer_size >= page_size) {
retval = READSTAT_ERROR_READ;
goto cleanup;
}

const char* shp = &page[shp_offset];
subheader_pointer_t shp_info = { 0 };
if ((retval = sas7bdat_parse_subheader_pointer(shp, page + page_size - shp, &shp_info, ctx)) != READSTAT_OK) {
goto cleanup;
}
if ((retval = sas7bdat_validate_subheader_pointer(&shp_info, page_size, subheader_count, ctx)) != READSTAT_OK) {
goto cleanup;
}
if (shp_info.compression != SAS_COMPRESSION_ROW_MOVED) {
retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION;
goto cleanup;
}

if ((retval = sas7bdat_submit_columns_if_needed(ctx, 1)) != READSTAT_OK) {
goto cleanup;
}
if ((retval = sas7bdat_parse_subheader_compressed(page + shp_info.offset, shp_info.len, ctx)) != READSTAT_OK) {
goto cleanup;
}

cleanup:

if (page) {
free(page);
}

return retval;
}

static readstat_error_t sas7bdat_parse_page_pass2(const char *page, size_t page_size, sas7bdat_ctx_t *ctx) {
uint16_t page_type;

Expand Down Expand Up @@ -941,7 +1015,20 @@ static readstat_error_t sas7bdat_parse_page_pass2(const char *page, size_t page_
if ((retval = sas7bdat_parse_subheader_pointer(shp, page + page_size - shp, &shp_info, ctx)) != READSTAT_OK) {
goto cleanup;
}
if (shp_info.len > 0 && shp_info.compression != SAS_COMPRESSION_TRUNC) {
if (shp_info.len == 0 || shp_info.compression == SAS_COMPRESSION_TRUNC) {
/* void */
}
else if (shp_info.compression == SAS_COMPRESSION_MOVED) {
uint64_t page_index = shp_info.offset - 1;
uint64_t subheader_index = shp_info.len - 1;
if (page_index >= ctx->page_count) {
retval = READSTAT_ERROR_PARSE;
goto cleanup;
}
if ((retval = sas7bdat_parse_moved_row(page_index, subheader_index, ctx)) != READSTAT_OK) {
goto cleanup;
}
} else {
if ((retval = sas7bdat_validate_subheader_pointer(&shp_info, page_size, subheader_count, ctx)) != READSTAT_OK) {
goto cleanup;
}
Expand Down Expand Up @@ -975,6 +1062,8 @@ static readstat_error_t sas7bdat_parse_page_pass2(const char *page, size_t page_
if ((retval = sas7bdat_parse_subheader_compressed(page + shp_info.offset, shp_info.len, ctx)) != READSTAT_OK) {
goto cleanup;
}
} else if (shp_info.compression == SAS_COMPRESSION_ROW_MOVED || shp_info.compression == SAS_COMPRESSION_MYSTERY) {
/* void */
} else {
retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION;
goto cleanup;
Expand Down Expand Up @@ -1148,6 +1237,19 @@ static readstat_error_t sas7bdat_parse_all_pages_pass2(sas7bdat_ctx_t *ctx) {
if ((retval = sas7bdat_update_progress(ctx)) != READSTAT_OK) {
goto cleanup;
}
if (ctx->requires_seek) {
if (io->seek(ctx->header_size + i * ctx->page_size, READSTAT_SEEK_SET, io->io_ctx) == -1) {
retval = READSTAT_ERROR_SEEK;
if (ctx->handle.error) {
snprintf(ctx->error_buf, sizeof(ctx->error_buf), "ReadStat: Failed to seek to position %" PRId64
" (= %" PRId64 " + %" PRId64 "*%" PRId64 ")",
ctx->header_size + i * ctx->page_size, ctx->header_size, i, ctx->page_size);
ctx->handle.error(ctx->error_buf, ctx->user_ctx);
}
goto cleanup;
}
ctx->requires_seek = 0;
}
if (io->read(ctx->page, ctx->page_size, io->io_ctx) < ctx->page_size) {
retval = READSTAT_ERROR_READ;
goto cleanup;
Expand Down