libisofs-legacy/libisofs/ecma119_read.c

746 lines
20 KiB
C

/*
* Functions to read an ISO image.
*/
/*
* TODO
* we need some kind of force option, to continue reading image on
* minor errors, such as incorrect time stamps....
*
* TODO
* need to check the ZF linux-especific extension for transparent decompresion
* TODO
* what the RR entry is?
*/
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include "ecma119_read.h"
#include "ecma119_read_rr.h"
#include "ecma119.h"
#include "util.h"
#include "volume.h"
#include "tree.h"
#include "messages.h"
#define BLOCK_SIZE 2048
static int
iso_read_dir(struct iso_read_info *info, struct iso_tree_node_dir *parent,
uint32_t block);
/**
* This reads the "." directory entry, and set the properties of the
* given directory propertly.
*/
static int
iso_read_dot_record(struct iso_read_info *info,
struct iso_tree_node_dir *dir,
struct ecma119_dir_record *record)
{
struct susp_sys_user_entry *sue;
struct susp_iterator *iter;
assert( info && dir && record );
iter = susp_iter_new(info, record);
while ( (sue = susp_iter_next(iter)) ) {
/* ignore entries from different version */
if (sue->version[0] != 1)
continue;
/* we don't care about any RR entry but PX and TF */
if (SUSP_SIG(sue, 'P', 'X')) {
if (read_rr_PX(info, sue, &dir->node.attrib))
break;
} else if (SUSP_SIG(sue, 'T', 'F')) {
if (read_rr_TF(info, sue, &dir->node.attrib))
break;
}
}
susp_iter_free(iter);
if (info->error)
return -1;
return 0;
}
/**
* Creates a suitable iso_tree_node from a directory record, and adds
* it to parent dir. If the directory record refers to a dir, it calls
* recursively iso_read_dir.
* On success, return 0.
* If file is not supported, return 0 but a new tree node is not added
* to parent.
* On serious error, returns -1
*/
static int
iso_read_single_directory_record(struct iso_read_info *info,
struct iso_tree_node_dir *parent,
struct ecma119_dir_record *record)
{
struct iso_tree_node *node;
struct stat atts;
time_t recorded;
char *name = NULL;
char *linkdest = NULL;
uint32_t relocated_dir = 0;
assert(info && record && parent);
memset(&atts, 0, sizeof(atts));
/*
* The idea is to read all the RR entries (if we want to do that and RR
* extensions exist on image), storing the info we want from that.
* Then, we need some sanity checks.
* Finally, we select what kind of node it is, and set values properly.
*/
if (info->rr) {
struct susp_sys_user_entry *sue;
struct susp_iterator *iter;
iter = susp_iter_new(info, record);
while ( (sue = susp_iter_next(iter)) ) {
/* ignore entries from different version */
if (sue->version[0] != 1)
continue;
if (SUSP_SIG(sue, 'P', 'X')) {
if (read_rr_PX(info, sue, &atts))
break;
} else if (SUSP_SIG(sue, 'T', 'F')) {
if (read_rr_TF(info, sue, &atts))
break;
} else if (SUSP_SIG(sue, 'N', 'M')) {
name = read_rr_NM(sue, name);
if (!name) {
info->error = LIBISOFS_WRONG_RR;
break;
}
} else if (SUSP_SIG(sue, 'S', 'L')) {
linkdest = read_rr_SL(sue, linkdest);
if (!linkdest) {
info->error = LIBISOFS_WRONG_RR;
break;
}
} else if (SUSP_SIG(sue, 'R', 'E')) {
/*
* this directory entry refers to a relocated directory.
* We simply ignore it, as it will be correctly handled
* when found the CL
*/
susp_iter_free(iter);
free(name);
return 0; /* is not an error */
} else if (SUSP_SIG(sue, 'C', 'L')) {
/*
* This entry is a placeholder for a relocated dir.
* We need to ignore other entries, with the exception of NM.
* Then we create a directory node that represents the
* relocated dir, and iterate over its children.
*/
relocated_dir = iso_read_bb(sue->data.CL.child_loc, 4, NULL);
} else if (SUSP_SIG(sue, 'S', 'F')) {
iso_msg_sorry(LIBISO_RR_UNSUPPORTED, "Sparse files not supported.");
info->error = LIBISOFS_UNSUPPORTED_IMAGE;
break;
} else if (SUSP_SIG(sue, 'R', 'R')) {
/* TODO I've seen this RR on mkisofs images. what's this? */
continue;
} else {
char msg[28];
sprintf(msg, "Unhandled SUSP entry %c%c.", sue->sig[0], sue->sig[1]);
iso_msg_hint(LIBISO_SUSP_UNHANLED, msg);
}
}
if ( !info->error && !relocated_dir && atts.st_mode == (mode_t) 0 ) {
iso_msg_sorry(LIBISO_RR_ERROR, "Mandatory Rock Ridge PX entry is "
"not present or it contains invalid values.");
info->error = LIBISOFS_WRONG_RR;
}
susp_iter_free(iter);
if (info->error)
return -1;
//TODO convert name to needed charset!!
} else {
/* RR extensions are not read / used */
atts.st_mode = info->mode;
atts.st_gid = info->gid;
atts.st_uid = info->uid;
if (record->flags[0] & 0x02)
atts.st_mode |= S_IFDIR;
else
atts.st_mode |= S_IFREG;
atts.st_ino = ++info->ino;
}
/*
* if we haven't RR extensions, or no NM entry is present,
* we use the name in directory record
*/
if (!name) {
size_t len;
name = info->get_name((char*)record->file_id, record->len_fi[0]);
/* remove trailing version number */
len = strlen(name);
if (len > 2 && name[len-2] == ';' && name[len-1] == '1') {
name[len-2] = '\0';
}
}
/*
* if we haven't RR extensions, or a needed TF time stamp is not present,
* we use plain iso recording time
*/
recorded = iso_datetime_read_7(record->recording_time);
if ( atts.st_atime == (time_t) 0 ) {
atts.st_atime = recorded;
}
if ( atts.st_ctime == (time_t) 0 ) {
atts.st_ctime = recorded;
}
if ( atts.st_mtime == (time_t) 0 ) {
atts.st_mtime = recorded;
}
/* the size is read from iso directory record */
atts.st_size = iso_read_bb(record->length, 4, NULL);
if (relocated_dir) {
/*
* Ensure that a placeholder for a relocated dir appears as
* a directory (mode & S_IFDIR).
* This is need because the placeholder is really a file, and
* in theory PX entry must be ignored.
* However, to make code clearer, we don't ignore it, because
* anyway it will be replaced by "." entry when recursing.
*/
atts.st_mode = S_IFDIR | (atts.st_mode & ~S_IFMT);
}
//TODO sanity checks!!
switch(atts.st_mode & S_IFMT) {
case S_IFDIR:
{
node = calloc(1, sizeof(struct iso_tree_node_dir));
node->type = LIBISO_NODE_DIR;
}
break;
case S_IFREG:
{
node = calloc(1, sizeof(struct iso_tree_node_file));
node->type = LIBISO_NODE_FILE;
/* set block with extend */
((struct iso_tree_node_file*)node)->loc.block =
iso_read_bb(record->block, 4, NULL);
}
break;
case S_IFLNK:
{
node = calloc(1, sizeof(struct iso_tree_node_symlink));
node->type = LIBISO_NODE_SYMLINK;
/* set the link dest */
((struct iso_tree_node_symlink*)node)->dest = linkdest;
}
break;
default:
iso_msg_sorry(LIBISO_RR_UNSUPPORTED, "File type not supported.");
return -1;
}
node->name = name;
node->attrib = atts;
node->refcount = 1;
node->procedence = LIBISO_PREVIMG;
iso_tree_add_child(parent, node);
if (node->type == LIBISO_NODE_DIR) {
uint32_t block;
if (relocated_dir)
block = relocated_dir;
else
block = iso_read_bb(record->block, 4, NULL);
/* add all children */
return iso_read_dir(info, (struct iso_tree_node_dir*)node, block);
} else
return 0;
}
/**
* Read all directory records in a directory, and creates a node for each
* of them, adding them to \p dir.
*/
static int
iso_read_dir(struct iso_read_info *info, struct iso_tree_node_dir *dir,
uint32_t block)
{
unsigned char buffer[2048];
struct ecma119_dir_record *record;
uint32_t size;
uint32_t pos = 0;
uint32_t tlen = 0;
if ( info->src->read_block(info->src, block, buffer) < 0 ) {
info->error = LIBISOFS_READ_FAILURE;
return -1;
}
/* Attributes of dir are set in the "." entry */
record = (struct ecma119_dir_record *)(buffer + pos);
size = iso_read_bb(record->length, 4, NULL);
if (info->rr)
iso_read_dot_record(info, dir, record);
tlen += record->len_dr[0];
pos += record->len_dr[0];
/* skip ".." */
record = (struct ecma119_dir_record *)(buffer + pos);
tlen += record->len_dr[0];
pos += record->len_dr[0];
while( tlen < size ) {
record = (struct ecma119_dir_record *)(buffer + pos);
if (pos == 2048 || record->len_dr[0] == 0) {
/*
* The directory entries are splitted in several blocks
* read next block
*/
if ( info->src->read_block(info->src, ++block, buffer) < 0 ) {
info->error = LIBISOFS_READ_FAILURE;
return -1;
}
tlen += 2048 - pos;
pos = 0;
/* next block must begin with a non-0 directory record */
assert(buffer[0] != 0);
continue;
}
/*
* What about ignoring files with existence flag?
* if (record->flags[0] & 0x01)
* continue;
*/
/*
* TODO
* For a extrange reason, mkisofs relocates directories under
* a RR_MOVED dir. It seems that it is only used for that purposes,
* and thus it should be removed from the iso tree before
* generating a new image with libisofs, that don't uses it.
* We can do that here, but I think it's a better option doing it
* on an app. using the library, such as genisofs.
*
* if ( record->len_fi[0] == 8 &&
* !strncmp(record->file_id,"RR_MOVED", 8) ) {
* continue;
* }
*/
/* check for unsupported multiextend */
if (record->flags[0] & 0x80) {
iso_msg_fatal(LIBISO_IMG_UNSUPPORTED, "Unsupported image.\n"
"This image makes use of Multi-Extend features, that "
"are not supported at this time.\n"
"If you need support for that, please request us this feature.\n"
"Thank you in advance\n");
info->error = LIBISOFS_UNSUPPORTED_IMAGE;
return -1;
}
/* check for unsupported interleaved mode */
if ( record->file_unit_size[0] || record->interleave_gap_size[0] ) {
iso_msg_fatal(LIBISO_IMG_UNSUPPORTED, "Unsupported image.\n"
"This image has at least one file recorded in "
"interleaved mode.\n"
"We don't support this mode, as we think it's not used.\n"
"If you're reading this, then we're wrong :)\n"
"Please contact libisofs developers, so we can fix this.\n"
"Thank you in advance\n");
info->error = LIBISOFS_UNSUPPORTED_IMAGE;
return -1;
}
//TODO check for unsupported extended attribs?
//TODO check for other flags?
if ( iso_read_single_directory_record(info, dir, record) )
return -1;
tlen += record->len_dr[0];
pos += record->len_dr[0];
}
return 0;
}
/**
* Read the SUSP system user entries of the "." entry of the root directory,
* indentifying when Rock Ridge extensions are being used.
*/
static int
read_root_susp_entries(struct iso_read_info *info,
struct iso_tree_node_dir *root,
uint32_t block)
{
unsigned char buffer[2048];
struct ecma119_dir_record *record;
struct susp_sys_user_entry *sue;
struct susp_iterator *iter;
if ( info->src->read_block(info->src, block, buffer) < 0 ) {
info->error = LIBISOFS_READ_FAILURE;
return -1;
}
/* record will be the "." directory entry for the root record */
record = (struct ecma119_dir_record *)buffer;
/*
* TODO
* SUSP specification claims that for CD-ROM XA the SP entry
* is not at position BP 1, but at BP 15. Is that used?
* In that case, we need to set info->len_skp to 15!!
*/
iter = susp_iter_new(info, record);
/* first entry must be an SP system use entry */
sue = susp_iter_next(iter);
if (!sue && info->error) {
susp_iter_free(iter);
return -1;
} else if (!sue || !SUSP_SIG(sue, 'S', 'P') ) {
iso_msg_debug("SUSP/RR is not being used.");
susp_iter_free(iter);
return 0;
}
/* it is a SP system use entry */
if ( sue->version[0] != 1 || sue->data.SP.be[0] != 0xBE
|| sue->data.SP.ef[0] != 0xEF) {
iso_msg_sorry(LIBISO_SUSP_WRONG, "SUSP SP system use entry seems to "
"be wrong. Ignoring Rock Ridge Extensions.");
susp_iter_free(iter);
return 0;
}
iso_msg_debug("SUSP/RR is being used.");
/*
* The LEN_SKP field, defined in IEEE 1281, SUSP. 5.3, specifies the
* number of bytes to be skipped within each System Use field.
* I think this will be always 0, but given that support this standard
* features is easy...
*/
info->len_skp = sue->data.SP.len_skp[0];
/*
* Ok, now search for ER entry.
* Just notice that the attributes for root dir are read in
* iso_read_dir
*
* TODO if several ER are present, we need to identify the position of
* what refers to RR, and then look for corresponding ES entry in
* each directory record. I have not implemented this (it's not used,
* no?), but if we finally need it, it can be easily implemented in
* the iterator, transparently for the rest of the code.
*/
while ( (sue = susp_iter_next(iter)) ) {
/* ignore entries from different version */
if (sue->version[0] != 1)
continue;
if (SUSP_SIG(sue, 'E', 'R')) {
if (info->rr) {
iso_msg_warn(LIBISO_SUSP_MULTIPLE_ER,
"More than one ER has found. This is not supported.\n"
"It will be ignored, but can cause problems. "
"Please notify us about this.\n");
}
/*
* it seems that Rock Ridge can be identified with any
* of the following
*/
if ( sue->data.ER.len_id[0] == 10 &&
!strncmp((char*)sue->data.ER.ext_id, "RRIP_1991A", 10) ) {
iso_msg_debug("Suitable Rock Ridge ER found. Version 1.10.");
info->rr = RR_EXT_110;
} else if ( ( sue->data.ER.len_id[0] == 10 &&
!strncmp((char*)sue->data.ER.ext_id, "IEEE_P1282", 10) )
|| ( sue->data.ER.len_id[0] == 9 &&
!strncmp((char*)sue->data.ER.ext_id, "IEEE_1282", 9) ) ) {
iso_msg_debug("Suitable Rock Ridge ER found. Version 1.12.");
info->rr = RR_EXT_112;
//TODO check also version?
} else {
iso_msg_warn(LIBISO_SUSP_MULTIPLE_ER,
"Not Rock Ridge ER found.\n"
"That will be ignored, but can cause problems in "
"image reading. Please notify us about this");
}
}
}
susp_iter_free(iter);
if (info->error)
return -1;
return 0;
}
static struct iso_volset *
read_pvm(struct iso_read_info *info, uint32_t block)
{
struct ecma119_pri_vol_desc *pvm;
struct iso_volume *volume;
struct iso_volset *volset;
struct ecma119_dir_record *rootdr;
char* volset_id;
unsigned char buffer[BLOCK_SIZE];
if ( info->src->read_block(info->src, block, buffer) < 0 ) {
info->error = LIBISOFS_READ_FAILURE;
return NULL;
}
pvm = (struct ecma119_pri_vol_desc *)buffer;
/* sanity checks */
if ( pvm->vol_desc_type[0] != 1
|| strncmp((char*)pvm->std_identifier, "CD001", 5)
|| pvm->vol_desc_version[0] != 1
|| pvm->file_structure_version[0] != 1 ) {
iso_msg_fatal(LIBISO_WRONG_IMG, "Wrong PVM. Maybe this is a damaged "
"image, or it's not an ISO-9660 image.\n");
info->error = LIBISOFS_WRONG_PVM;
return NULL;
}
volume = iso_volume_new(NULL, NULL, NULL);
/* fill strings */
volume->volume_id = strcopy((char*)pvm->volume_id, 32);
volume->publisher_id = strcopy((char*)pvm->publisher_id, 128);
volume->data_preparer_id = strcopy((char*)pvm->data_prep_id, 128);
volume->system_id = strcopy((char*)pvm->system_id, 32);
volume->application_id = strcopy((char*)pvm->application_id, 128);
volume->copyright_file_id = strcopy((char*)pvm->copyright_file_id, 37);
volume->abstract_file_id = strcopy((char*)pvm->abstract_file_id, 37);
volume->biblio_file_id = strcopy((char*)pvm->bibliographic_file_id, 37);
volset_id = strcopy((char*)pvm->vol_set_id, 128);
*(info->size) = iso_read_bb(pvm->vol_space_size, 4, NULL);
volset = iso_volset_new(volume, volset_id);
free(volset_id);
/*
* TODO
* I don't like the way the differences volset - volume are hanled now.
* While theorically right (a volset can contain several volumes), in
* practice it seems that this never happen. Current implementation, with
* the volume array in volset, make things innecessarily harder. I think
* we can refactor that in a single way.
*/
//volset->volset_size = pvm->vol_set_size[0];
rootdr = (struct ecma119_dir_record *)pvm->root_dir_record;
/*
* check if RR is being used. Note that this functions returns
* != 0 on error. Info about if RR is being used is stored in info
*/
if ( read_root_susp_entries(info, volume->root,
iso_read_bb(rootdr->block, 4, NULL)) ) {
/* error, cleanup and return */
iso_volset_free(volset);
return NULL;
}
/* are RR ext present */
info->hasRR = info->rr ? 1 : 0;
info->iso_root_block = iso_read_bb(rootdr->block, 4, NULL);
/*
* PVM has things that can be interested, but don't have a member in
* volume struct, such as creation date. In a multisession disc, we could
* keep the creation date and update the modification date, for example.
*/
return volset;
}
struct iso_volset *
iso_volset_read(struct data_source *src, struct ecma119_read_opts *opts)
{
struct iso_read_info info;
struct iso_volset *volset;
uint32_t block, root_dir_block;
unsigned char buffer[BLOCK_SIZE];
assert(src && opts);
/* fill info with suitable values */
info.error = LIBISOFS_READ_OK;
info.src = src;
info.rr = RR_EXT_NO;
info.len_skp = 0;
info.ino = 0;
info.norock = opts->norock;
info.uid = opts->uid;
info.gid = opts->gid;
info.mode = opts->mode & ~S_IFMT;
info.size = &opts->size;
root_dir_block = 0;
/* read primary volume description */
volset = read_pvm(&info, opts->block + 16);
if (volset == NULL) {
opts->error = info.error;
return NULL;
}
block = opts->block + 17;
do {
if ( info.src->read_block(info.src, block, buffer) < 0 ) {
info.error = LIBISOFS_READ_FAILURE;
iso_volset_free(volset);
return NULL;
}
switch (buffer[0]) {
case 0:
/* boot record */
//TODO handle el-torito
break;
case 2:
/* suplementary volume descritor */
{
struct ecma119_sup_vol_desc *sup;
struct ecma119_dir_record *root;
sup = (struct ecma119_sup_vol_desc*)buffer;
if (sup->esc_sequences[0] == 0x25 &&
sup->esc_sequences[1] == 0x2F &&
(sup->esc_sequences[2] == 0x40 ||
sup->esc_sequences[2] == 0x43 ||
sup->esc_sequences[2] == 0x45) ) {
/* it's a Joliet Sup. Vol. Desc. */
info.hasJoliet = 1;
root = (struct ecma119_dir_record*)sup->root_dir_record;
root_dir_block = iso_read_bb(root->block, 4, NULL);
//TODO maybe we can set the volume attribs from this
//descriptor
} else {
iso_msg_hint(LIBISO_UNSUPPORTED_VD,
"Not supported Sup. Vol. Desc found.");
}
}
break;
case 255:
/*
* volume set terminator
* ignore, as it's checked in loop end condition
*/
break;
default:
{
char msg[32];
sprintf(msg, "Ignoring Volume descriptor %d.", buffer[0]);
iso_msg_hint(LIBISO_UNSUPPORTED_VD, msg);
}
break;
}
block++;
} while (buffer[0] != 255);
opts->hasRR = info.hasRR;
opts->hasJoliet = info.hasJoliet;
/* user doesn't want to read RR extensions */
if (info.norock)
info.rr = RR_EXT_NO;
/* select what tree to read */
if (info.rr) {
/* RR extensions are available */
if (opts->preferjoliet && info.hasJoliet) {
/* if user prefers joliet, that is used */
iso_msg_debug("Reading Joliet extensions.");
info.get_name = ucs2str;
info.rr = RR_EXT_NO;
/* root_dir_block already contains root for joliet */
} else {
/* RR will be used */
iso_msg_debug("Reading Rock Ridge extensions.");
root_dir_block = info.iso_root_block;
info.get_name = strcopy;
}
} else {
/* RR extensions are not available */
if (info.hasJoliet && !opts->nojoliet) {
/* joliet will be used */
iso_msg_debug("Reading Joliet extensions.");
info.get_name = ucs2str;
/* root_dir_block already contains root for joliet */
} else {
/* default to plain iso */
iso_msg_debug("Reading plain ISO-9660 tree.");
root_dir_block = info.iso_root_block;
info.get_name = strcopy;
}
}
/* Read the ISO/RR or Joliet tree */
if ( iso_read_dir(&info, volset->volume[0]->root, root_dir_block) ) {
/* error, cleanup and return */
iso_volset_free(volset);
return NULL;
}
// TODO merge tree info
return volset;
}