/*
 *   (C) Copyright IBM Corp. 2004
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * Module: LVM2 Plugin
 * File: evms2/engine/plugins/lvm2/containers.c
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <plugin.h>
#include "lvm2.h"

/**
 * Translate between LVM2 VG names and EVMS container names.
 **/

void vg_name_to_container_name(char *vg_name,
			       char *container_name,
			       storage_container_t *disk_group)
{
	LOG_ENTRY();

	if (disk_group) {
		snprintf(container_name, EVMS_NAME_SIZE, "%s/%s/%s",
			 disk_group->name, LVM2_DEV_DIRECTORY, vg_name);
	} else {
		snprintf(container_name, EVMS_NAME_SIZE, "%s/%s",
			 LVM2_DEV_DIRECTORY, vg_name);
	}

	LOG_EXIT_VOID();
}

int container_name_to_vg_name(char *container_name, char *vg_name)
{
	char *ptr = container_name;
	int rc = 0;

	LOG_ENTRY();

	/* Find the "lvm2/" and skip over it. */
	ptr = strstr(ptr, LVM2_DEV_DIRECTORY);
	if (ptr) {
		ptr += strlen(LVM2_DEV_DIRECTORY) + 1;
		strncpy(vg_name, ptr, EVMS_NAME_SIZE);
	} else {
		LOG_ERROR("Invalid container name: %s\n", container_name);
		rc = EINVAL;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * Increment and decrement the size of the container by the total size of
 * the data extents on the specified PV object.
 **/

static void increment_container_size(storage_container_t *container,
				     storage_object_t *object)
{
	container_data_t *c_data = container->private_data;
	pv_data_t *pv_data = object->consuming_private_data;

	LOG_ENTRY();

	container->size += pv_data->pe_count * c_data->pe_size;

	LOG_EXIT_VOID();
}

static void decrement_container_size(storage_container_t *container,
				     storage_object_t *object)
{
	container_data_t *c_data = container->private_data;
	pv_data_t *pv_data = object->consuming_private_data;

	LOG_ENTRY();

	container->size -= pv_data->pe_count * c_data->pe_size;

	LOG_EXIT_VOID();
}

/**
 * add_object_to_container
 *
 * Add a PV object to this container's consumed-objects list and update
 * the container's size. This list is in order by the PV's pv_index.
 **/
void add_object_to_container(storage_object_t *object,
			     storage_container_t *container)
{
	pv_data_t *pvd, *pv_data = object->consuming_private_data;
	insert_flags_t flag = INSERT_AFTER;
	storage_object_t *obj;
	list_element_t iter;

	LOG_ENTRY();
	LOG_DEBUG("Adding object %s to container %s.\n",
		  object->name, container->name);

	LIST_FOR_EACH(container->objects_consumed, iter, obj) {
		pvd = obj->consuming_private_data;
		if (pv_data->pv_index < pvd->pv_index) {
			flag = INSERT_BEFORE;
			break;
		}
	}

	object->consuming_container = container;
	EngFncs->insert_thing(container->objects_consumed, object, flag, iter);
	increment_container_size(container, object);

	LOG_EXIT_VOID();
}

/**
 * remove_object_from_container
 *
 * Remove a PV object from this container's consumed-objects list and update
 * the container's size.
 **/
void remove_object_from_container(storage_object_t *object,
				  storage_container_t *container)
{
	LOG_ENTRY();
	LOG_DEBUG("Removing object %s from container %s.\n",
		  object->name, container->name);

	EngFncs->remove_thing(container->objects_consumed, object);
	object->consuming_container = NULL;
	decrement_container_size(container, object);

	LOG_EXIT_VOID();
}

/**
 * allocate_container
 *
 * Allocate a container and all necessary private data. Create an empty
 * freespace region and add it to the produced-objects list.
 **/
static storage_container_t *allocate_container(char *name,
					       char *uuid,
					       unsigned long sequence,
					       unsigned long pe_size,
					       unsigned long max_lvs,
					       unsigned long max_pvs,
					       unsigned long vg_flags)
{
	storage_container_t *container = NULL;
	container_data_t *c_data;
	int rc;

	LOG_ENTRY();

	rc = EngFncs->allocate_container(name, &container);
	if (rc) {
		LOG_ERROR("Error allocating container %s.\n", name);
		goto out;
	}

	c_data = EngFncs->engine_alloc(sizeof(*c_data));
	if (!c_data) {
		LOG_ERROR("Error allocating private data for container %s.\n",
			  name);
		EngFncs->free_container(container);
		container = NULL;
		goto out;
	}

	/* Initialize the container. */
	container->plugin = my_plugin_record;
	container->private_data = c_data;

	/* Initialize the container's private-data. */
	c_data->container = container;
	memcpy(c_data->uuid, uuid, LVM2_UUID_LEN);
	c_data->sequence = sequence;
	c_data->pe_size = pe_size;
	c_data->max_lvs = max_lvs;
	c_data->max_pvs = max_pvs;
	c_data->flags = vg_flags;

	/* Create the freespace region for the container. */
	rc = allocate_freespace_region(container);
	if (rc) {
		EngFncs->engine_free(c_data);
		EngFncs->free_container(container);
		container = NULL;
		goto out;
	}

	/* Add the new container to the global list. */
	EngFncs->insert_thing(lvm2_containers, container, INSERT_AFTER, NULL);

	LOG_DETAILS("Allocated container %s\n", name);

out:
	LOG_EXIT_PTR(container);
	return container;
}

/**
 * deallocate_container
 *
 * Free the memory used for this container. This includes the container itself,
 * the container's private data, the VGDA tree, and the freespace region. This
 * also removes the container from the global list.
 **/
void deallocate_container(storage_container_t *container)
{
	container_data_t *c_data;
	storage_object_t *freespace;

	LOG_ENTRY();

	if (container) {
		LOG_DETAILS("Deallocating container %s.\n", container->name);

		EngFncs->remove_thing(lvm2_containers, container);

		freespace = get_freespace_region(container->objects_produced);
		if (freespace) {
			remove_region_from_container(freespace);
			deallocate_region(freespace);
		}

		c_data = container->private_data;
		if (c_data) {
			EngFncs->engine_free(c_data);
		}

		EngFncs->free_container(container);
	}

	LOG_EXIT_VOID();
}

/**
 * create_new_container
 *
 * Create a new container with the specified name and extent-size. No PVs will
 * be added to this container yet.
 **/
storage_container_t *create_new_container(u_int64_t extent_size,
					  char *vg_name,
					  storage_container_t *disk_group)
{
	storage_container_t *container = NULL;
	char container_name[EVMS_NAME_SIZE+1];
	char container_uuid[LVM2_UUID_LEN+1];
	unsigned long flags = LVM2_CONTAINER_FLAG_RESIZEABLE |
			      LVM2_CONTAINER_FLAG_READ |
			      LVM2_CONTAINER_FLAG_WRITE;
	int rc;

	LOG_ENTRY();

	/* Convert the name and generate a new UUID. */
	vg_name_to_container_name(vg_name, container_name, disk_group);
	rc = create_new_uuid(container_uuid);
	if (rc) {
		goto out;
	}

	/* Allocate the new container. */
	container = allocate_container(container_name, container_uuid,
				       0, extent_size, 0, 0, flags);
	if (!container) {
		goto out;
	}

out:
	LOG_EXIT_PTR(container);
	return container;
}


/**
 * Container discovery routines.
 **/


/**
 * create_container_for_pv
 *
 * Create a container based on the information in this PV's VGDA tree.
 **/
static storage_container_t *create_container_for_pv(pv_data_t *pv_data,
						    char *vg_uuid)
{
	storage_container_t *container = NULL;
	container_data_t *c_data;
	key_value_t *vgda_tree = pv_data->vgda_tree;
	key_value_t *node;
	char *vg_name, container_name[EVMS_NAME_SIZE];
	unsigned long sequence, pe_size, max_lvs, max_pvs, vg_flags = 0;

	LOG_ENTRY();

	/* The key of the first node in the tree is the VG name. */
	vg_name = vgda_tree->key;
	vg_name_to_container_name(vg_name, container_name,
				  pv_data->object->disk_group);

	/* Find the sequence-number, status flags,
	 * pe-size, and max lvs and pvs.
	 */

	node = find_key(get_section(vgda_tree), "seqno");
	if (!node) {
		/* Parse error! */
		goto out;
	}
	sequence = read_int32(node);

	node = find_key(get_section(vgda_tree), "status");
	if (!node) {
		/* Parse error! */
		goto out;
	}
	read_flags(node, VG_FLAGS, &vg_flags);

	node = find_key(get_section(vgda_tree), "extent_size");
	if (!node) {
		/* Parse error! */
		goto out;
	}
	pe_size = read_int64(node);

	node = find_key(get_section(vgda_tree), "max_lv");
	if (!node) {
		/* Parse error! */
		goto out;
	}
	max_lvs = read_int32(node);

	node = find_key(get_section(vgda_tree), "max_pv");
	if (!node) {
		/* Parse error! */
		goto out;
	}
	max_pvs = read_int32(node);

	container = allocate_container(container_name, vg_uuid, sequence,
				       pe_size, max_lvs, max_pvs, vg_flags);
	if (!container) {
		goto out;
	}

	/* Copy this PV's VGDA tree to the new container. */
	c_data = container->private_data;
	c_data->vgda_tree = vgda_tree;

out:
	LOG_EXIT_PTR(container);
	return container;
}

/**
 * find_container_by_uuid
 *
 * Search the global lvm2_containers list for the container with the
 * specified UUID.
 **/
static storage_container_t *find_container_by_uuid(char *uuid)
{
	storage_container_t *container = NULL;
	container_data_t *c_data;
	list_element_t iter;

	LOG_ENTRY();
	LOG_DETAILS("Searching for container with UUID %s\n", uuid);

	LIST_FOR_EACH(lvm2_containers, iter, container) {
		c_data = container->private_data;
		if (!memcmp(c_data->uuid, uuid, LVM2_UUID_LEN)) {
			break;
		}
	}

	LOG_EXIT_PTR(container);
	return container;
}

/**
 * find_container_for_pv
 *
 * Get the VG uuid from the VGDA tree on this PV. If the specified
 * container already exists, return it. If not, create a new one.
 **/
static storage_container_t *find_container_for_pv(pv_data_t *pv_data)
{
	storage_container_t *container = NULL;
	key_value_t *uuid_node;
	char uuid[LVM2_UUID_LEN+1];

	LOG_ENTRY();

	/* Find the VG's UUID in the PV's VGDA tree. */
	uuid_node = find_key(get_section(pv_data->vgda_tree), "id");
	if (!uuid_node) {
		LOG_ERROR("Parse error finding \"id\" entry in VGDA!\n");
		goto out;
	}

	unformat_uuid(uuid_node->value.string, uuid);

	container = find_container_by_uuid(uuid);
	if (!container) {
		container = create_container_for_pv(pv_data, uuid);
	}

out:
	LOG_EXIT_PTR(container);
	return container;
}

/**
 * add_pv_to_container
 **/
static int add_pv_to_container(pv_data_t *pv_data,
			       storage_container_t *container)
{
	container_data_t *c_data = container->private_data;
	key_value_t *node;
	int seqno, rc = 0;

	LOG_ENTRY();

	/* FIXME: What kind of verification do we need here before actually
	 *        adding this PV to the container?
	 *        - Need to check the sequence numbers from the VGDA match.
	 */

	/* Check that the container's sequence number matches the sequence
	 * number in the VGDA on this PV.
	 */
	node = find_key(get_section(pv_data->vgda_tree), "seqno");
	if (!node) {
		/* Parse error! */
		rc = EINVAL;
		goto out;
	}

	seqno = read_int32(node);
	if (seqno != c_data->sequence) {
		LOG_ERROR("PV object %s has VGDA with sequence number not "
			  "equal to sequence number for container %s.\n",
			  pv_data->object->name, container->name);
		/* FIXME: What do we do here? Which sequence number is correct? */
	}

	add_object_to_container(pv_data->object, container);

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * discover_containers
 *
 * Discover and assemble all containers defined by objects on the PV-list.
 **/
int discover_containers(list_anchor_t pv_list)
{
	storage_container_t *container;
	storage_object_t *object;
	list_element_t iter;
	pv_data_t *pv_data;
	int rc = 0;

	LOG_ENTRY();

	LIST_FOR_EACH(pv_list, iter, object) {
		pv_data = object->consuming_private_data;

		/* See if this PV's container has already been created. */
		container = find_container_for_pv(pv_data);
		if (!container) {
			continue;
		}

		rc = add_pv_to_container(pv_data, container);
		if (rc) {
			continue;
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * count_pvs_in_vgda
 *
 * Count the number of PVs in the physical_volumes section of the VGDA tree.
 **/
static int count_pvs_in_vgda(key_value_t *vgda_tree)
{
	key_value_t *node1, *node2;
	int count = 0;

	LOG_ENTRY();

	node1 = find_key(get_section(vgda_tree), "physical_volumes");
	if (!node1) {
		/* Parse error!. */
		goto out;
	}

	for (node2 = get_section(node1); node2; node2 = node2->next) {
		count++;
	}

out:
	LOG_EXIT_INT(count);
	return count;
}

/**
 * check_containers
 *
 * Perform any necessary validation on the containers after they've been
 * discovered.
 **/
int check_containers(boolean final_call)
{
	storage_container_t *container;
	container_data_t *c_data;
	list_element_t iter;
	int pv_count, object_count;
	int rc = 0;

	LOG_ENTRY();

	LIST_FOR_EACH(lvm2_containers, iter, container) {
		LOG_DEBUG("Checking container %s.\n", container->name);
		c_data = container->private_data;

		/* If this container is missing its vgda-tree, then we must be
		 * in an engine rediscovery, and we don't want to re-examine
		 * this container, since it hasn't been discarded.
		 */
		if (!c_data->vgda_tree) {
			LOG_DEBUG("Skipping container %s during rediscovery.\n",
				  container->name);
			continue;
		}

		/* Check that the number of discovered PV-objects matches
		 * the number of PVs recorded in the metadata.
		 */
		pv_count = count_pvs_in_vgda(c_data->vgda_tree);
		object_count = EngFncs->list_count(container->objects_consumed);
		if (pv_count != object_count) {
			LOG_WARNING("Container %s is missing %u PVs.\n",
				    container->name, pv_count - object_count);
			c_data->flags |= LVM2_CONTAINER_FLAG_INCOMPLETE;
		} else {
			c_data->flags &= ~LVM2_CONTAINER_FLAG_INCOMPLETE;
		}

		/* FIXME: What else can we check? */

		/* Create error objects for each PV in the VGDA that
		 * wasn't found. Only do this during final-call.
		 */
		if (final_call &&
		    (c_data->flags & LVM2_CONTAINER_FLAG_INCOMPLETE)) {
			create_missing_pvs(container);
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * clean_containers
 *
 * Perform any necessary cleanup on the list of containers at the end of discovery.
 **/
int cleanup_containers(void)
{
	storage_container_t *container;
	storage_object_t *object;
	container_data_t *c_data;
	pv_data_t *pv_data;
	list_element_t iter1, iter2;
	int rc = 0;

	LOG_ENTRY();

	/* Delete all the VGDA trees on the PV-objects. */
	LIST_FOR_EACH(lvm2_containers, iter1, container) {
		c_data = container->private_data;
		LIST_FOR_EACH(container->objects_consumed, iter2, object) {
			pv_data = object->consuming_private_data;
			if (!MISSING_PV(object)) {
				delete_vgda_tree(pv_data->vgda_tree);
			}
			pv_data->vgda_tree = NULL;
		}
		/* The container's tree is just a copy from one of the PVs. */
		c_data->vgda_tree = NULL;
	}

	LOG_EXIT_INT(rc);
	return rc;
}


/**
 * Metadata commit routines.
 **/


/**
 * commit_vgda_to_pv
 *
 * Find the next available space in this PV's metadata area and write the
 * VGDA buffer.
 **/
static int commit_vgda_to_pv(storage_object_t *object,
			     char *vgda_buffer,
			     u_int32_t vgda_size,
			     u_int32_t vgda_crc,
			     boolean backup)
{
	pv_data_t *pv_data = object->consuming_private_data;
	metadata_location_t *location;
	list_element_t iter;
	u_int64_t old_size, new_size, new_offset, sector;
	int rc = 0;

	LOG_ENTRY();
	LOG_DEBUG("Writing VGDA metadata for container %s for object %s.\n",
		  object->consuming_container->name, object->name);

	if (MISSING_PV(object)) {
		LOG_DEBUG("Skipping missing object %s.\n", object->name);
		LOG_EXIT_INT(0);
		return 0;
	}

	/* Size of the VGDA buffer in sectors. */
	new_size = round_up(vgda_size, EVMS_VSECTOR_SIZE);
	new_size >>= EVMS_VSECTOR_SIZE_SHIFT;

	LIST_FOR_EACH(pv_data->metadata_areas, iter, location) {
		if (backup) {
			/* During backups, don't move the VGDA location. */
			new_offset = location->vgda_offset;
		} else {
			/* Find the start of the next sector
			 * following the current VGDA location.
			 */
			old_size = round_up(location->vgda_size,
					    EVMS_VSECTOR_SIZE);
			old_size >>= EVMS_VSECTOR_SIZE_SHIFT;
			new_offset = location->vgda_offset + old_size;

			/* Make sure the buffer won't overrun the metadata area. */
			if (new_offset + new_size > location->mda_size) {
				/* Wrap around to the start of the metadata area. */
				new_offset = 1;
				if (new_offset + new_size > location->mda_size) {
					/* If it still doesn't fit, we're screwed. */
					LOG_ERROR("VGDA buffer for container %s is too "
						  "large to fit in the metadata area "
						  "for PV %s. Buffer is %"PRIu64" "
						  "sectors, but only %"PRIu64" are "
						  "available.\n",
						  object->consuming_container->name,
						  object->name, new_size,
						  location->mda_size - new_offset);
					rc = ENOSPC;
					break;
				}
			}
		}

		/* Update the location info so we have it
		 * when we write the MDA-header.
		 */
		location->vgda_offset = new_offset;
		location->vgda_size = vgda_size;
		location->vgda_crc = vgda_crc;

		/* Write the VGDA buffer to disk. */
		sector = location->mda_start + location->vgda_offset;
		if (backup) {
			rc = EngFncs->save_metadata(object->consuming_container->name,
						    object->name, sector,
						    new_size, vgda_buffer);
		} else {
			rc = WRITE(object, sector, new_size, vgda_buffer);
		}
		if (rc) {
			LOG_ERROR("Error writing VGDA buffer for PV %s.\n",
				  object->name);
			break;
		}

		LOG_DEBUG("Wrote VGDA buffer for object %s, sector %"PRIu64".\n",
			  object->name, sector);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * commit_vgda
 *
 * Construct the text VGDA and write it to each of the PVs in the container.
 **/
static int commit_vgda(storage_container_t *container, boolean backup)
{
	container_data_t *c_data = container->private_data;
	char *vgda_buffer = NULL;
	u_int32_t vgda_size, vgda_crc;
	storage_object_t *object;
	list_element_t iter;
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("%s VGDA for container %s.\n",
		  backup ? "Backing-up" : "Committing", container->name);

	/* Increment the container's sequence number. */
	if (!backup) {
		c_data->sequence++;
	}

	rc = prepare_vgda_buffer(container, &vgda_buffer);
	if (rc) {
		goto out;
	}

	/* The size of the VGDA includes the trailing NUL. */
	vgda_size = strlen(vgda_buffer) + 1;
	vgda_crc = lvm2_calc_crc(LVM2_INITIAL_CRC, vgda_buffer, vgda_size);

	/* Write the VGDA buffer to all the PVs in the container. */
	LIST_FOR_EACH(container->objects_consumed, iter, object) {
		rc = commit_vgda_to_pv(object, vgda_buffer,
				       vgda_size, vgda_crc, backup);
		if (rc) {
			break;
		}
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * commit_mda_headers
 *
 * After the VGDA has been written to all the PVs, update the MDA headers to
 * point to the new VGDAs.
 **/
static int commit_mda_headers(storage_container_t *container, boolean backup)
{
	storage_object_t *object;
	list_element_t iter;
	int rc = 0;

	LOG_ENTRY();

	LIST_FOR_EACH(container->objects_consumed, iter, object) {
		rc = write_mda_header(object, FALSE, FALSE, backup);
		if (rc) {
			/* FIXME: Should we keep going or bail out? */
			break;
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * commit_container_metadata
 *
 * Write initial PV-label/PV-header/MDA-header information to all new PVs.
 * Then construct the text VGDA buffer and write to each metadata area in
 * each PV in the container. Then update the MDA-header for each metadata
 * area in each PV.
 **/
int commit_container_metadata(storage_container_t *container, boolean backup)
{
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("%s metadata for container %s.\n",
		  backup ? "Backing-up" : "Committing", container->name);

	rc = commit_new_pvs(container, backup);
	if (rc) {
		goto out;
	}

	if (!backup) {
		rc = commit_resized_pvs(container);
		if (rc) {
			goto out;
		}
	}

	rc = commit_vgda(container, backup);
	if (rc) {
		goto out;
	}

	rc = commit_mda_headers(container, backup);
	if (rc) {
		goto out;
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}


/**
 * Expanding and shrinking containers
 **/


/**
 * can_add_object
 *
 * The specified object can be added to this container if it has room for at
 * least one extent and metadata.
 **/
int can_add_object(storage_object_t *object, storage_container_t *container)
{
	container_data_t *c_data = container->private_data;
	int rc = 0;

	LOG_ENTRY();
	LOG_DEBUG("Checking if object %s can be added to container %s.\n",
		  object->name, container->name);

	if (object->size < LVM2_MIN_PV_SIZE(c_data->pe_size)) {
		LOG_DEBUG("Object %s is too small to add to container %s.\n",
			  object->name, container->name);
		rc = ENOSPC;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * can_remove_object
 *
 * The specified object can be removed from its consuming container if no
 * data regions have any mappings to the object, and if it isn't the only
 * object in the container.
 **/
int can_remove_object(storage_object_t *object)
{
	storage_container_t *container = object->consuming_container;
	storage_object_t *region;
	list_element_t iter;
	int count, rc = 0;

	LOG_ENTRY();
	LOG_DEBUG("Checking if object %s can be removed from container %s.\n",
		  object->name, container->name);

	count = EngFncs->list_count(container->objects_consumed);
	if (count <= 1) {
		LOG_DEBUG("Object %s is the only object in container %s. "
			  "Cannot be removed.\n", object->name, container->name);
		rc = EBUSY;
		goto out;
	}

	LIST_FOR_EACH(object->parent_objects, iter, region) {
		if (region->data_type == DATA_TYPE) {
			LOG_DEBUG("Object %s is used by region %s - cannot be "
				  "removed.\n", object->name, region->name);
			rc = EBUSY;
			goto out;
		}
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * can_shrink_object
 *
 * The specified object can be shrunk if it has available extents at the end of
 * the object. If the object is shrinkable, set the shrink_limit to the amount
 * of space that could be freed.
 **/
int can_shrink_object(storage_object_t *object, u_int64_t *shrink_limit)
{
	container_data_t *c_data = object->consuming_container->private_data;
	pv_data_t *pv_data = object->consuming_private_data;
	u_int64_t i, unused_extents = 0;
	int rc = EBUSY;

	LOG_ENTRY();
	LOG_DEBUG("Checking if PV %s can be shrunk.\n", object->name);

	/* Work backwards from the end of the PE-map
	 * and see how many extents are unused.
	 */
	for (i = pv_data->pe_count; i > 0; i--) {
		if (extent_is_available(pv_data->pe_map + (i-1))) {
			unused_extents++;
		} else {
			break;
		}
	}

	if (unused_extents) {
		if (unused_extents == pv_data->pe_count) {
			/* Always need at least one extent per PV. */
			unused_extents--;
		}
		LOG_DEBUG("PV %s can shrink by %"PRIu64" extents.\n",
			  object->name, unused_extents);
		*shrink_limit = unused_extents * c_data->pe_size;
		rc = 0;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * add_object
 *
 * Create a new PV for this object and add it to the container.
 **/
int add_object(storage_object_t *object,
	       storage_container_t *container,
	       option_array_t *options)
{
	storage_object_t *this_object;
	list_element_t iter;
	pv_data_t *pv_data;
	u_int32_t pv_index = 0;
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Adding object %s to container %s.\n",
		  object->name, container->name);

	rc = can_add_object(object, container);
	if (rc) {
		goto out;
	}

	/* Create a new PV for this object. */
	rc = create_new_pv(object, container);
	if (rc) {
		goto out;
	}

	/* Find the next available PV index. */
	LIST_FOR_EACH(container->objects_consumed, iter, this_object) {
		pv_data = this_object->consuming_private_data;
		if (pv_data->pv_index != pv_index) {
			break;
		}
		pv_index++;
	}

	pv_data = object->consuming_private_data;
	pv_data->pv_index = pv_index;

	/* Add the PV to the container. */
	add_object_to_container(object, container);

	/* Rebuild the freespace mappings to reflect the new space. */
	delete_freespace_mappings(container);
	rc = create_freespace_mappings(container);
	if (rc) {
		/* FIXME: Any cleanup possible? */
		goto out;
	}

	container->flags |= SCFLAG_DIRTY;

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * remove_object
 *
 * Remove this consumed object from its consuming container.  Make sure there
 * are no allocated objects produced by the container that are using space in
 * the object.
 **/
int remove_object(storage_object_t *object)
{
	storage_container_t *container = object->consuming_container;
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Removing object %s from container %s.\n",
		  object->name, container->name);

	rc = can_remove_object(object);
	if (rc) {
		goto out;
	}

	/* Erase the LVM2 metadata from this PV. */
	erase_metadata(object);

	/* Delete the freespace mappings before removing the PV. */
	delete_freespace_mappings(container);

	/* Remove this PV from the container and free its private-data. */
	remove_object_from_container(object, container);
	deallocate_pv_data(object);

	adjust_pv_indexes(container);

	/* Rebuild the freespace mappings. */
	rc = create_freespace_mappings(container);
	if (rc) {
		/* FIXME: Any cleanup possible? */
		goto out;
	}

	container->flags |= SCFLAG_DIRTY;

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * expand_object_in_container
 *
 * Tell the specified PV in this container to expand using the input-objects
 * and options. After it expands, update the PV's internal data to reflect
 * the increased space.
 **/
int expand_object_in_container(storage_container_t *container,
			       storage_object_t *consumed_object,
			       storage_object_t *expand_object,
			       list_anchor_t input_objects,
			       option_array_t *options)
{
	pv_data_t *pv_data = consumed_object->consuming_private_data;
	container_data_t *c_data = container->private_data;
	physical_extent_t *old_pe_map, *new_pe_map;
	u_int64_t new_extents, old_pe_count, new_pe_count;
	u_int64_t i, old_size, new_size, delta_size;
	metadata_location_t *location;
	list_element_t iter;
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Expanding object %s in container %s.\n",
		  consumed_object->name, container->name);

	/* Make sure the consumed-object is really a PV. */
	if (consumed_object->consuming_container != container) {
		LOG_ERROR("Attempt to expand object %s which isn't a PV in "
			  "container %s.\n", consumed_object->name,
			  container->name);
		rc = EINVAL;
		goto out;
	}

	/* Tell the PV object to expand. Save the old size. */
	old_size = consumed_object->size;
	rc = consumed_object->plugin->functions.plugin->expand(consumed_object,
							       expand_object,
							       input_objects,
							       options);
	if (rc) {
		LOG_ERROR("Error expanding object %s in container %s.\n",
			  consumed_object->name, container->name);
		goto out;
	}

	/* Calculate the number of new extents after the object expands. */
	new_size = consumed_object->size;
	delta_size = new_size - old_size;
	new_extents = delta_size / c_data->pe_size;

	/* Save the old PE map and allocate a new PE map. */
	old_pe_map = pv_data->pe_map;
	old_pe_count = pv_data->pe_count;
	new_pe_count = old_pe_count + new_extents;
	pv_data->pe_count = new_pe_count;
	pv_data->pe_map = NULL;

	rc = allocate_pe_map(pv_data);
	if (rc) {
		pv_data->pe_count = old_pe_count;
		pv_data->pe_map = old_pe_map;
		goto out;
	}
	new_pe_map = pv_data->pe_map;

	/* Copy the contents of the old PE map to the new PE map.
	 * For each old PE, point that PE's LE at the new PE.
	 */
	for (i = 0; i < old_pe_count; i++) {
		new_pe_map[i].le = old_pe_map[i].le;

		if (old_pe_map[i].le &&
		    old_pe_map[i].le->pe == &old_pe_map[i]) {
			old_pe_map[i].le->pe = &new_pe_map[i];
		}
	}

	/* For each metadata area that follows the PV's data area,
	 * move that metadata area back by the PV's change in size.
	 */
	LIST_FOR_EACH(pv_data->metadata_areas, iter, location) {
		if (location->mda_start > pv_data->pe_start) {
			location->mda_start += delta_size;
			location->vgda_offset = 0;
			location->vgda_size = 1;
		}
	}

	/* Rebuild the freespace mappings to reflect the new space. */
	delete_freespace_mappings(container);
	rc = create_freespace_mappings(container);
	if (rc) {
		/* FIXME: Any cleanup possible? */
		goto out;
	}

	EngFncs->engine_free(old_pe_map);

	/* Update the container's size. */
	container->size += new_extents * c_data->pe_size;
	container->flags |= SCFLAG_DIRTY;
	pv_data->flags |= LVM2_PV_FLAG_RESIZED;

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * shrink_object_in_container
 *
 * Tell the specified PV in this container to shrink using the input-objects
 * and options. After it shrinks, update the PV's internal data to reflect
 * the decreased space.
 **/
int shrink_object_in_container(storage_container_t *container,
			       storage_object_t *consumed_object,
			       storage_object_t *shrink_object,
			       list_anchor_t input_objects,
			       option_array_t *options)
{
	pv_data_t *pv_data = consumed_object->consuming_private_data;
	u_int64_t shrink_limit, old_size, new_size, delta_size;
	u_int64_t i, removed_extents, old_pe_count, new_pe_count;
	container_data_t *c_data = container->private_data;
	physical_extent_t *old_pe_map, *new_pe_map;
	metadata_location_t *location;
	list_element_t iter;
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Shrinking object %s in container %s.\n",
		  consumed_object->name, container->name);

	/* Make sure the consumed-object is really a PV. */
	if (consumed_object->consuming_container != container) {
		LOG_ERROR("Attempt to shrink object %s which isn't a PV in "
			  "container %s.\n", consumed_object->name,
			  container->name);
		rc = EINVAL;
		goto out;
	}

	/* Check that we can allow this PV to shrink. */
	rc = can_shrink_object(consumed_object, &shrink_limit);
	if (rc) {
		LOG_ERROR("Cannot shrink object %s.\n", consumed_object->name);
		goto out;
	}

	/* Tell the PV object to shrink. Save the old size. */
	old_size = consumed_object->size;
	rc = consumed_object->plugin->functions.plugin->shrink(consumed_object,
							       shrink_object,
							       input_objects,
							       options);
	if (rc) {
		LOG_ERROR("Error shrinking object %s in container %s.\n",
			  consumed_object->name, container->name);
		goto out;
	}

	/* Calculate the number of extents to remove after the object shrunk. */
	new_size = consumed_object->size;
	delta_size = old_size - new_size;
	removed_extents = (delta_size / c_data->pe_size) + 
			  ((delta_size % c_data->pe_size) ? 1 : 0);

	/* Save the old PE map and allocate a new PE map. */
	old_pe_map = pv_data->pe_map;
	old_pe_count = pv_data->pe_count;
	new_pe_count = old_pe_count - removed_extents;
	pv_data->pe_count = new_pe_count;
	pv_data->pe_map = NULL;

	rc = allocate_pe_map(pv_data);
	if (rc) {
		pv_data->pe_count = old_pe_count;
		pv_data->pe_map = old_pe_map;
		goto out;
	}
	new_pe_map = pv_data->pe_map;

	/* Copy the contents of the old PE map to the new PE map.
	 * For each old PE, point that PE's LE at the new PE.
	 */
	for (i = 0; i < new_pe_count; i++) {
		new_pe_map[i].le = old_pe_map[i].le;

		if (old_pe_map[i].le &&
		    old_pe_map[i].le->pe == &old_pe_map[i]) {
			old_pe_map[i].le->pe = &new_pe_map[i];
		}
	}

	/* For each metadata area that follows the PV's data area,
	 * move that metadata area forward by the PV's change in size.
	 */
	LIST_FOR_EACH(pv_data->metadata_areas, iter, location) {
		if (location->mda_start > pv_data->pe_start) {
			location->mda_start -= delta_size;
			location->vgda_offset = 0;
			location->vgda_size = 1;
		}
	}

	/* Rebuild the freespace mappings to reflect the reduced space. */
	delete_freespace_mappings(container);
	rc = create_freespace_mappings(container);
	if (rc) {
		/* FIXME: Any cleanup possible? */
		goto out;
	}

	EngFncs->engine_free(old_pe_map);

	/* Update the container's size. */
	container->size -= removed_extents * c_data->pe_size;
	container->flags |= SCFLAG_DIRTY;
	pv_data->flags |= LVM2_PV_FLAG_RESIZED;

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * max_consecutive_extents_in_container
 *
 * Find the maximum number of consecutive PEs for any PV in this container.
 **/
u_int64_t max_consecutive_extents_in_container(storage_container_t *container)
{
	storage_object_t *object;
	list_element_t iter;
	u_int64_t extents, max_extents = 0;

	LOG_ENTRY();
	LOG_DEBUG("Finding max number of consecutive PEs in container %s.\n",
		  container->name);

	LIST_FOR_EACH(container->objects_consumed, iter, object) {
		extents = max_consecutive_extents_on_object(object);
		max_extents = max(max_extents, extents);
	}

	LOG_EXIT_U64(max_extents);
	return max_extents;
}

