/*
 *   (C) Copyright IBM Corp. 2004
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * Module: LVM2 Plugin
 * File: evms2/engine/plugins/lvm2/objects.c
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <plugin.h>
#include "lvm2.h"

/**
 * find_pv_by_index
 *
 * Search the specified container for the PV with the specified index.
 **/
pv_data_t *find_pv_by_index(storage_container_t *container, u_int32_t pv_index)
{
	storage_object_t *object;
	pv_data_t *pv_data = NULL;
	list_element_t iter;

	LOG_ENTRY();

	LIST_FOR_EACH(container->objects_consumed, iter, object) {
		pv_data = object->consuming_private_data;
		if (pv_data->pv_index == pv_index) {
			LOG_EXIT_PTR(pv_data);
			return pv_data;
		}
	}

	LOG_EXIT_PTR(NULL);
	return NULL;
}

/**
 * find_pv_by_name
 *
 * Search the specified container for the PV with the specified name.
 **/
storage_object_t *find_pv_by_name(storage_container_t *container, char *name)
{
	storage_object_t *object;
	list_element_t iter;
	int rc;

	LOG_ENTRY();

	if (name) {
		LIST_FOR_EACH(container->objects_consumed, iter, object) {
			rc = strncmp(object->name, name, EVMS_NAME_SIZE);
			if (!rc) {
				LOG_EXIT_PTR(object);
				return object;
			}
		}
	}

	LOG_EXIT_PTR(NULL);
	return NULL;
}

/**
 * adjust_pv_indexes
 *
 * After removing a PV from a container, the remaining PVs should have their
 * pv-indexes renumbered so they are consecutive and start from zero. These
 * indexes are only meaningful during discovery and commit, so we're free to
 * change them as necessary.
 **/
void adjust_pv_indexes(storage_container_t *container)
{
	storage_object_t *object;
	list_element_t iter;
	pv_data_t *pv_data;
	u_int32_t i = 0;

	LOG_ENTRY();
	LOG_DEBUG("Adjusting PV indexes for container %s.\n", container->name);

	LIST_FOR_EACH(container->objects_consumed, iter, object) {
		pv_data = object->consuming_private_data;
		pv_data->pv_index = i++;
	}

	LOG_EXIT_VOID();
}

/**
 * allocate_pe_map
 *
 * Allocate and initialize a PE map for this PV. The pv_data must be
 * initialized before calling this routine.
 **/
int allocate_pe_map(pv_data_t *pv_data)
{
	physical_extent_t *pe_map;
	u_int64_t i;
	int rc = 0;

	LOG_ENTRY();

	pe_map = EngFncs->engine_alloc(pv_data->pe_count * sizeof(*pe_map));
	if (pe_map) {
		for (i = 0; i < pv_data->pe_count; i++) {
			/* number and pv_data are effectively constant. */
			pe_map[i].pv_data = pv_data;
			pe_map[i].number = i;
		}
		pv_data->pe_map = pe_map;
	} else {
		LOG_ERROR("Error allocating PE map for PV object %s.\n",
			  pv_data->object->name);
		rc = ENOMEM;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * deallocate_pe_map
 *
 * Free the PE map for this PV.
 **/
static void deallocate_pe_map(pv_data_t *pv_data)
{
	LOG_ENTRY();

	if (pv_data->pe_map) {
		EngFncs->engine_free(pv_data->pe_map);
		pv_data->pe_map = NULL;
	}

	LOG_EXIT_VOID();
}

/**
 * allocate_pv_data
 *
 * Allocate the consumed-private-data for a PV object.
 **/
int allocate_pv_data(storage_object_t *object,
		     list_anchor_t metadata_locations,
		     key_value_t *vgda_tree,
		     char *uuid,
		     u_int64_t label_sector,
		     u_int32_t pv_index,
		     u_int64_t pe_start,
		     u_int64_t pe_count,
		     unsigned long flags)
{
	pv_data_t *pv_data;
	int rc = 0;

	LOG_ENTRY();
	LOG_DEBUG("Allocating PV private data for object %s.\n", object->name);

	pv_data = EngFncs->engine_alloc(sizeof(*pv_data));
	if (pv_data) {
		pv_data->object = object;
		pv_data->vgda_tree = vgda_tree;
		pv_data->metadata_areas = metadata_locations;
		memcpy(pv_data->uuid, uuid, LVM2_UUID_LEN);
		pv_data->label_sector = label_sector;
		pv_data->pv_index = pv_index;
		pv_data->pe_start = pe_start;
		pv_data->pe_count = pe_count;
		pv_data->flags = flags;

		rc = allocate_pe_map(pv_data);
		if (rc) {
			EngFncs->engine_free(pv_data);
		} else {
			object->consuming_private_data = pv_data;
		}
	} else {
		LOG_ERROR("Error allocating PV private data for "
			  "object %s.\n", object->name);
		rc = ENOMEM;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * deallocate_pv_data
 *
 * Free the memory for this PV's private-data. This routine does not remove
 * any items from the PV's parent-objects list, and does not make any
 * modifications to the PE-map before deleting it.
 **/
void deallocate_pv_data(storage_object_t *object)
{
	pv_data_t *pv_data = object->consuming_private_data;
	metadata_location_t *location;
	list_element_t iter1, iter2;

	LOG_ENTRY();

	if (pv_data) {
		LOG_DETAILS("Deallocating PV private data for "
			    "object %s.\n", object->name);

		deallocate_pe_map(pv_data);

		if (pv_data->metadata_areas) {
			LIST_FOR_EACH_SAFE(pv_data->metadata_areas,
					   iter1, iter2, location) {
				EngFncs->engine_free(location);
				EngFncs->remove_element(iter1);
			}
			EngFncs->destroy_list(pv_data->metadata_areas);
		}

		if (pv_data->vgda_tree && !MISSING_PV(object)) {
			delete_vgda_tree(pv_data->vgda_tree);
		}

		EngFncs->engine_free(pv_data);
		object->consuming_private_data = NULL;
	}

	LOG_EXIT_VOID();
}

/**
 * discover_pv_objects
 *
 * Search the list of input objects for valid LVM2 PV metadata. Remove
 * valid PVs from the input list and add to the PV list.
 **/
int discover_pv_objects(list_anchor_t input_objects, list_anchor_t pv_list)
{
	storage_object_t *object;
	list_element_t itr1, itr2;
	int rc = 0;

	LOG_ENTRY();

	LIST_FOR_EACH_SAFE(input_objects, itr1, itr2, object) {
		if (object->data_type != DATA_TYPE) {
			LOG_DEBUG("Skipping object %s - not DATA_TYPE.\n",
				  object->name);
			continue;
		}

		rc = read_pv_metadata(object);
		if (!rc) {
			EngFncs->remove_element(itr1);
			EngFncs->insert_element(pv_list, itr1,
						INSERT_AFTER, NULL);
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * commit_new_pv
 *
 * Write a new MDA-header and a new PV-label/header to this object.
 **/
static int commit_new_pv(storage_object_t *object, boolean backup)
{
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("%s PV object %s.\n",
		  backup ? "Backing-up" : "Committing new", object->name);

	if (!backup) {
		rc = write_mda_header(object, TRUE, FALSE, backup);
		if (rc) {
			goto out;
		}
	}

	rc = write_pv_label_and_header(object, backup);
	if (rc) {
		goto out;
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * commit_new_pvs
 *
 * For each new PV (from container creates and expands), write a PV-label and
 * PV-header, and an initial MDA-header.
 **/
int commit_new_pvs(storage_container_t *container, boolean backup)
{
	storage_object_t *object;
	pv_data_t *pv_data;
	list_element_t iter;
	int rc = 0;

	LOG_ENTRY();

	LIST_FOR_EACH(container->objects_consumed, iter, object) {
		pv_data = object->consuming_private_data;
		if ((pv_data->flags & LVM2_PV_FLAG_NEW) || backup) {
			rc = commit_new_pv(object, backup);
			if (rc) {
				/* If we can't write the initial metadata to
				 * all new PVs, we can't continue commit.
				 */
				break;
			}
			if (!backup) {
				pv_data->flags &= ~(LVM2_PV_FLAG_NEW |
						    LVM2_PV_FLAG_RESIZED);
			}
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * commit_resized_pv
 *
 * Update the MDA-header and PV-label/header on this object.
 **/
static int commit_resized_pv(storage_object_t *object)
{
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Committing resized PV object %s.\n", object->name);

	rc = write_mda_header(object, TRUE, TRUE, FALSE);
	if (rc) {
		goto out;
	}

	rc = write_pv_label_and_header(object, FALSE);
	if (rc) {
		goto out;
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * commit_resized_pvs
 *
 * For each resized PV, write a PV-label and PV-header that reflects the new
 * size of the PV.
 **/
int commit_resized_pvs(storage_container_t *container)
{
	storage_object_t *object;
	pv_data_t *pv_data;
	list_element_t iter;
	int rc = 0;

	LOG_ENTRY();

	LIST_FOR_EACH(container->objects_consumed, iter, object) {
		pv_data = object->consuming_private_data;
		if (pv_data->flags & LVM2_PV_FLAG_RESIZED) {
			rc = commit_resized_pv(object);
			if (rc) {
				/* FIXME: Bail out or keep going? */
				break;
			}
			pv_data->flags &= ~LVM2_PV_FLAG_RESIZED;
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * create_new_pv
 *
 * Create a new PV for the specified object. The specified container will be
 * the eventual location of this PV, but this routine will not add the new
 * PV to this container.
 *
 * The pv_index is left blank, and must be added before adding the new PV to
 * the container.
 **/
int create_new_pv(storage_object_t *object, storage_container_t *container)
{
	container_data_t *c_data = container->private_data;
	list_anchor_t locations;
	metadata_location_t *location;
	char pv_uuid[LVM2_UUID_LEN+1];
	u_int64_t pe_start, pe_count;
	unsigned long flags = LVM2_PV_FLAG_NEW | LVM2_PV_FLAG_ALLOCATABLE;
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Creating new PV for object %s.\n", object->name);

	/* Generate a new UUID for this PV. */
	rc = create_new_uuid(pv_uuid);
	if (rc) {
		goto out;
	}

	/* Calculate the start and size of the data area. */
	pe_start = LVM2_LABEL_SCAN_SECTORS + LVM2_DEFAULT_MDA_SIZE;
	pe_count = (object->size - pe_start) / c_data->pe_size;

	/* Create a new list for the metadata locations. */
	locations = EngFncs->allocate_list();
	if (!locations) {
		LOG_ERROR("Error allocating metadata locations list for "
			  "new PV %s.\n", object->name);
		rc = ENOMEM;
		goto out;
	}

	/* Create the entry for the metadata locations list. Set the initial
	 * vgda size to 1 so the commit code will put the first VGDA in the
	 * apprpriate location in the metadata area.
	 */
	location = EngFncs->engine_alloc(sizeof(*location));
	if (!location) {
		LOG_ERROR("Error allocating metadata location entry for "
			  "new PV %s.\n", object->name);
		EngFncs->destroy_list(locations);
		rc = ENOMEM;
		goto out;
	}

	location->mda_start = LVM2_LABEL_SCAN_SECTORS;
	location->mda_size = LVM2_DEFAULT_MDA_SIZE;
	location->vgda_size = 1;
	EngFncs->insert_thing(locations, location, INSERT_BEFORE, NULL);

	/* Allocate the PV's private data. */
	rc = allocate_pv_data(object, locations, NULL, pv_uuid,
			      1, 0, pe_start, pe_count, flags);
	if (rc) {
		goto out;
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * extent_is_available
 *
 * An extent is available if it's not mapped to by a data-region and it's not
 * the target of an extent-move.
 **/
boolean extent_is_available(physical_extent_t *pe)
{
	/* No entry/exit logs. */
	boolean result = TRUE;
	if ((pe->le &&
	     pe->le->le_map->r_map->r_data->region->data_type == DATA_TYPE) ||
	    MISSING_PV(pe->pv_data->object)) {
		result = FALSE;
	}
	return result;
}

/**
 * count_available_extents_in_pv
 *
 * Count the number of available extents in this pv.
 **/
u_int64_t count_available_extents_in_pv(storage_object_t *object)
{
	pv_data_t *pv_data = object->consuming_private_data;
	u_int64_t i, count = 0;

	LOG_ENTRY();
	LOG_DEBUG("Counting free extents in object %s.\n", object->name);

	for (i = 0; i < pv_data->pe_count; i++) {
		if (extent_is_available(&(pv_data->pe_map[i]))) {
			count++;
		}
	}

	LOG_EXIT_U64(count);
	return count;
}

/**
 * count_available_extents_in_pvs
 *
 * Count the total number of available extents in this list of PVs.
 **/
u_int64_t count_available_extents_in_pvs(list_anchor_t objects)
{
	storage_object_t *object;
	list_element_t iter;
	u_int64_t extents = 0;

	LOG_ENTRY();

	LIST_FOR_EACH(objects, iter, object) {
		extents += count_available_extents_in_pv(object);
	}

	LOG_EXIT_U64(extents);
	return extents;
}

/**
 * count_available_pvs
 *
 * Count the number of PVs in this object-list that have at
 * least one unused extent.
 **/
u_int32_t count_available_pvs(list_anchor_t objects)
{
	storage_object_t *object;
	list_element_t iter;
	u_int32_t count = 0;
	u_int64_t rc;

	LOG_ENTRY();

	LIST_FOR_EACH(objects, iter, object) {
		rc = count_available_extents_in_pv(object);
		if (rc) {
			count++;
		}
	}

	LOG_EXIT_INT(count);
	return count;
}

/**
 * pv_names_to_list
 *
 * Convert the value_list of PV-names to an engine-list of PV objects. If the
 * pv-names list is empty, return the entire consumed-objects list from this
 * container.
 **/
list_anchor_t pv_names_to_list(value_list_t *pv_names,
			       storage_container_t *container)
{
	storage_object_t *object;
	list_anchor_t objects;
	u_int i;

	LOG_ENTRY();

	objects = EngFncs->allocate_list();
	if (!objects) {
		goto out;
	}

	if (pv_names && pv_names->count) {
		for (i = 0; i < pv_names->count; i++) {
			object = find_pv_by_name(container,
						 pv_names->value[i].s);
			if (object) {
				EngFncs->insert_thing(objects, object,
						      INSERT_AFTER, NULL);
			}
		}
	} else {
		EngFncs->concatenate_lists(objects,
					   container->objects_consumed);
	}

out:
	LOG_EXIT_PTR(objects);
	return objects;
}

/**
 * create_error_object
 **/
static storage_object_t *create_error_object(storage_container_t *container,
					     u_int32_t pv_index,
					     u_int64_t pe_start,
					     u_int64_t pe_count)
{
	container_data_t *c_data = container->private_data;
	storage_object_t *object = NULL;
	plugin_record_t *error_plugin;
	list_anchor_t input, output;
	option_array_t *options;
	char name[EVMS_NAME_SIZE+1];
	int rc;

	LOG_ENTRY();

	snprintf(name, EVMS_NAME_SIZE, "%s/pv%u", container->name, pv_index);

	input = EngFncs->allocate_list();
	output = EngFncs->allocate_list();
	options = EngFncs->engine_alloc(sizeof(option_array_t) +
					sizeof(key_value_pair_t) * 3);
	if (!input || !output || !options) {
		goto out;
	}

	rc = EngFncs->get_plugin_by_name("Error", &error_plugin);
	if (rc) {
		goto out;
	}

	options->option[0].is_number_based = FALSE;
	options->option[0].name = "name";
	options->option[0].type = EVMS_Type_String;
	options->option[0].value.s = name;

	options->option[1].is_number_based = FALSE;
	options->option[1].name = "size";
	options->option[1].type = EVMS_Type_Unsigned_Int64;
	options->option[1].value.ui64 = pe_start + pe_count * c_data->pe_size;

	options->option[2].is_number_based = FALSE;
	options->option[2].name = "type";
	options->option[2].type = EVMS_Type_String;
	options->option[2].value.s = "segment";

	options->count = 3;

	rc = error_plugin->functions.plugin->create(input, options, output);
	if (rc) {
		goto out;
	}

	object = EngFncs->first_thing(output, NULL);
	LOG_DEBUG("Created error object %s.\n", object->name);

out:
	LOG_EXIT_PTR(object);
	return object;
}

/**
 * create_missing_pv
 *
 * Create an error object based on the information in this PV metadata section.
 * Add the error object to the specified container.
 **/
static int create_missing_pv(key_value_t *pv_entry,
			     storage_container_t *container)
{
	container_data_t *c_data = container->private_data;
	storage_object_t *object;
	pv_data_t *pv_data;
	u_int32_t pv_index;
	u_int64_t pe_start, pe_count;
	unsigned long pv_flags = 0;
	char pv_uuid[LVM2_UUID_LEN+1];
	list_anchor_t locations;
	int rc;

	LOG_ENTRY();

	/* Get the PV info from the VGDA. */
	rc = parse_pv_info(pv_entry, pv_uuid, &pv_index,
			   &pe_start, &pe_count, &pv_flags);
	if (rc) {
		goto out;
	}

	MESSAGE(_("The PV with index %u was not found when discovering "
		  "container %s. An \"error\" object will be created in "
		  "it's place. Any regions in this container that map to "
		  "this PV will return I/O errors if they attempt to read "
		  "or write to this PV. Regions that don't map to this PV "
		  "will work normally."),
		pv_index, container->name);

	/* Dummy list of metadata locations. */
	locations = EngFncs->allocate_list();
	if (!locations) {
		goto out;
	}

	/* Create the error object. */
	object = create_error_object(container, pv_index, pe_start, pe_count);
	if (!object) {
		EngFncs->destroy_list(locations);
		rc = ENOMEM;
		goto out;
	}

	/* Create PV-data for the error object. */
	rc = allocate_pv_data(object, locations, c_data->vgda_tree, pv_uuid,
			      0, pv_index, pe_start, pe_count, pv_flags);
	if (rc) {
		/* FIXME: Cleanup? */
		EngFncs->destroy_list(locations);
		goto out;
	}
	pv_data = object->consuming_private_data;
	pv_data->flags |= LVM2_PV_FLAG_MISSING;

	add_object_to_container(object, container);

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * create_missing_pvs
 *
 * For each PV listed in the metadata, see if that PV has been discovered and
 * added to the container. If it isn't found, create an error object to
 * represent that PV.
 **/
int create_missing_pvs(storage_container_t *container)
{
	container_data_t *c_data = container->private_data;
	key_value_t *vgda_tree = c_data->vgda_tree;
	key_value_t *pv_section, *pv_entry;
	pv_data_t *pv_data;
	u_int32_t pv_index;
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Creating missing PVs for container %s.\n", container->name);

	pv_section = find_key(get_section(vgda_tree), "physical_volumes");
	if (!pv_section) {
		/* Parse error! */
		rc = EINVAL;
		goto out;
	}

	for (pv_entry = get_section(pv_section);
	     pv_entry; pv_entry = pv_entry->next) {
		rc = sscanf(pv_entry->key, "pv%u", &pv_index);
		if (rc != 1) {
			/* Parse error! */
			continue;
		}

		pv_data = find_pv_by_index(container, pv_index);
		if (!pv_data) {
			create_missing_pv(pv_entry, container);
		}
	}

	rc = 0;

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * get_available_objects
 *
 * Get a list of all available objects that could be used to create or expand
 * a container. All unused disks, segments, and regions are valid. If a
 * container is specified, make sure the available objects are in the same
 * disk-group.
 **/
int get_available_objects(storage_container_t *container,
			  list_anchor_t *objects)
{
	storage_container_t *disk_group;
	object_search_flags_t flags;
	list_element_t iter1, iter2;
	storage_object_t *object;
	int rc;

	LOG_ENTRY();

	disk_group = (container) ? container->disk_group : NULL;
	flags = VALID_INPUT_OBJECT |
		((container && !disk_group) ? NO_DISK_GROUP : 0);
	rc = EngFncs->get_object_list(DISK | SEGMENT | REGION, DATA_TYPE,
				      NULL, disk_group, flags, objects);

	/* Don't expand containers with regions produced by that container. */
	LIST_FOR_EACH_SAFE(*objects, iter1, iter2, object) {
		if (object->plugin == my_plugin_record &&
		    object->producing_container == container) {
			EngFncs->remove_element(iter1);
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * max_consecutive_extents_on_object
 *
 * Find the maximum number of consecutive PEs on this PV.
 **/
u_int64_t max_consecutive_extents_on_object(storage_object_t *object)
{
	pv_data_t *pv_data = object->consuming_private_data;
	physical_extent_t *pe_map = pv_data->pe_map;
	u_int64_t i, j, max_extents = 0;

	LOG_ENTRY();
	LOG_DEBUG("Finding max number of consecutive PEs on object %s.\n",
		  object->name);

	for (i = 0; i < pv_data->pe_count; i++) {
		for (j = i;
		     j < pv_data->pe_count && extent_is_available(pe_map + j);
		     j++) {
			;
		}

		max_extents = max(max_extents, j - i);
		i = j;
	}

	LOG_EXIT_U64(max_extents);
	return max_extents;
}

/**
 * consecutive_extents_at_pe
 *
 * Count the number of consecutive PEs on the specified PV starting at the
 * specified PE.
 **/
u_int64_t consecutive_extents_at_pe(pv_data_t *pv_data, u_int64_t pe_index)
{
	u_int64_t i;

	LOG_ENTRY();
	LOG_DEBUG("Finding number of consecutive extents on object %s starting "
		  "at PE %"PRIu64".\n", pv_data->object->name, pe_index);

	for (i = pe_index; i < pv_data->pe_count; i++) {
		if (!extent_is_available(pv_data->pe_map + i)) {
			break;
		}
	}

	LOG_EXIT_U64(i - pe_index);
	return i - pe_index;
}

