/*-------------------------------------------------------------------------
 *
 * posix_shmem.c
 *	  Implement shared memory using POSIX facilities
 *
 * These routines represent a fairly thin layer on top of POSIX shared
 * memory functionality.
 *
 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include <signal.h>
#include <unistd.h>
#include <sys/file.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#ifdef HAVE_KERNEL_OS_H
#include <kernel/OS.h>
#endif

#include "miscadmin.h"
#include "storage/ipc.h"
#include "storage/pg_shmem.h"


#define IPCProtection	(0600)	/* access/modify by user only */
#define IPCNameLength		32	/* must be long enough to contain all possible format strings
								 * see GenerateIPDName */


unsigned long UsedShmemSegID = 0;
void	   *UsedShmemSegAddr = NULL;

static void GenerateIPCName(int memKey, char *dest);
static void *InternalIpcMemoryCreate(int memKey, Size size);
static void IpcMemoryDetach(int status, Datum shmaddr);
static void IpcMemoryDelete(int status, Datum memKey);
static PGShmemHeader *PGSharedMemoryAttach(int key);


/*
 *	GenerateIPCName(key, dest)
 *
 * Generate a shared memory object key name using the argument key.
 * This uses the magic number and text to prevent collisions from other
 * apps. 
 */
static void
GenerateIPCName(int memKey, char *dest)
{
	/* This must be 31 characters or less for portability (i.e. Mac OS X) */
	sprintf(dest, "PostgreSQL.%lx.%lx", PGShmemMagic, memKey);
}

/*
 *	InternalIpcMemoryCreate(memKey, size)
 *
 * Attempt to create a new shared memory segment with the specified key.
 * Will fail (return NULL) if such a segment already exists.  If successful,
 * attach the segment to the current process and return its attached address.
 * On success, callbacks are registered with on_shmem_exit to detach and
 * delete the segment when on_shmem_exit is called.
 *
 * If we fail with a failure code other than collision-with-existing-segment,
 * print out an error and abort.  Other types of errors are not recoverable.
 */
static void *
InternalIpcMemoryCreate(int memKey, Size size)
{
	int			fd;
	void	   *memAddress;
	char		keyName[IPCNameLength];
	struct		stat statbuf;
	
	GenerateIPCName(memKey, keyName);			
	fd = shm_open(keyName, O_RDWR | O_CREAT | O_EXCL, IPCProtection);

	if (fd < 0)
	{
		/*
		 * Fail quietly if error indicates a collision with existing segment.
		 * One would expect EEXIST, given that we said O_EXCL.
		 */
		if (errno == EEXIST || errno == EACCES)
			return NULL;

		/*
		 * Else complain and abort
		 */
		ereport(FATAL,
				(errmsg("could not create shared memory segment: %m"),
		  errdetail("Failed system call was shm_open(name=%s, oflag=%lu, mode=%lu).",
					keyName, (unsigned long) O_CREAT | O_EXCL,
					(unsigned long) IPCProtection),
				 (errno == EMFILE) ?
				 errhint("This error means that the process has reached its limit "
						 "for open file descriptors.") : 0,
				 (errno == ENOSPC) ?
				 errhint("This error means the process has ran out of address "
						 "space.") : 0));
	}

	/* Register on-exit routine to delete the new segment */
	on_shmem_exit(IpcMemoryDelete, Int32GetDatum(memKey));
	
	/* Increase the size of the file descriptor to the desired length.
	 * If this fails so will mmap since it can't map size bytes. */
	fstat(fd, &statbuf);
	if (statbuf.st_size < size)
		ftruncate(fd, size);
	
	/* OK, should be able to attach to the segment */
	memAddress = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
	close(fd);

	if (memAddress == (void *) -1)
		elog(FATAL, "mmap(fd=%d) failed: %m", fd);

	/* Register on-exit routine to detach new segment before deleting */
	on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress));

	/* Record key and ID in lockfile for data directory. */
	RecordSharedMemoryInLockFile((unsigned long) memKey, 0);

	return memAddress;
}

/****************************************************************************/
/*	IpcMemoryDetach(status, shmaddr)	removes a shared memory segment		*/
/*										from process' address space		*/
/*	(called as an on_shmem_exit callback, hence funny argument list)		*/
/****************************************************************************/
static void
IpcMemoryDetach(int status, Datum shmaddr)
{
	PGShmemHeader  *hdr;
	hdr = (PGShmemHeader *) DatumGetPointer(shmaddr);
	if (munmap(DatumGetPointer(shmaddr), hdr->totalsize) < 0)
		elog(LOG, "munmap(%p, ...) failed: %m", DatumGetPointer(shmaddr));
}

/****************************************************************************/
/*	IpcMemoryDelete(status, fd)		deletes a shared memory segment		*/
/*	(called as an on_shmem_exit callback, hence funny argument list)		*/
/****************************************************************************/
static void
IpcMemoryDelete(int status, Datum memKey)
{
	char		keyName[IPCNameLength];
	GenerateIPCName(memKey, keyName);
	
	if (shm_unlink(keyName) < 0)
		elog(LOG, "shm_unlink(%s) failed: %m", keyName);
}

/*
 * PGSharedMemoryIsInUse
 *
 * Is a previously-existing shmem segment still existing and in use?
 *
 * The point of this exercise is to detect the case where a prior postmaster
 * crashed, but it left child backends that are still running.	Therefore
 * we only care about shmem segments that are associated with the intended
 * DataDir.  This is an important consideration since accidental matches of
 * shmem segment IDs are reasonably common.
 */
bool
PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
{
	char		keyName[IPCNameLength];
	PGShmemHeader  *hdr;
	int			fd, isValidHeader;
	
#ifndef WIN32
	struct stat statbuf;
#endif
	
	GenerateIPCName(id1, keyName);

	/*
	 * We detect whether a shared memory segment is in use by seeing whether
	 * it (a) exists and (b) has any processes are attached to it.
	 */
	fd = shm_open(keyName, O_RDWR, 0);
	if (fd < 0)
	{
		/*
		 * ENOENT means the segment no longer exists.
		 */
		if (errno == ENOENT)
			return false;

		/*
		 * EACCES implies that the segment belongs to some other userid, which
		 * means it is not a Postgres shmem segment that is relevant to our
		 * data directory.
		 */
		if (errno == EACCES)
			return false;

		/*
		 * Otherwise, we had better assume that the segment is in use.
		 */
		return true;
	}

	/*
	 * Try to attach to the segment and see if it matches our data directory.
	 * This avoids fd-conflict problems on machines that are running
	 * several postmasters under the same userid.  On Windows, which doesn't
	 * have useful inode numbers, we can't do this so we punt and assume there
	 * is a conflict.
	 */
#ifdef WIN32
	close(fd);
	return true;
#endif
	
	if (stat(DataDir, &statbuf) < 0)
	{
		close(fd);
		return true;			/* if can't stat, be conservative */
	}

	hdr = (PGShmemHeader *) mmap(NULL, sizeof(PGShmemHeader), PROT_READ, MAP_SHARED, fd, 0);
	close(fd);

	if (hdr == (PGShmemHeader *) -1)
		return true;			/* if can't attach, be conservative */

	isValidHeader = hdr->magic != PGShmemMagic ||
		hdr->device != statbuf.st_dev ||
		hdr->inode != statbuf.st_ino;
	munmap((void *) hdr, sizeof(PGShmemHeader));
	
	if (isValidHeader)
	{
		/*
		 * It's either not a Postgres segment, or not one for my data
		 * directory.  In either case it poses no threat.
		 */
		munmap((void *) hdr, sizeof(PGShmemHeader));
		return false;
	}

	/* Trouble --- looks a lot like there's still live backends */
	
	return true;
}


/*
 * PGSharedMemoryCreate
 *
 * Create a shared memory segment of the given size and initialize its
 * standard header.  Also, register an on_shmem_exit callback to release
 * the storage.
 *
 * Dead Postgres segments are recycled if found, but we do not fail upon
 * collision with non-Postgres shmem segments.	The idea here is to detect and
 * re-use keys that may have been assigned by a crashed postmaster or backend.
 *
 * makePrivate means to always create a new segment, rather than attach to
 * or recycle any existing segment.
 *
 * The port number is passed for possible use as a key (for SysV, we use
 * it to generate the starting shmem key).	In a standalone backend,
 * zero will be passed.
 */
PGShmemHeader *
PGSharedMemoryCreate(Size size, bool makePrivate, int port)
{
	int			NextShmemSegID;
	void	   *memAddress;
	PGShmemHeader *hdr;
	char		keyName[IPCNameLength];
	
#ifndef WIN32
	struct stat statbuf;
#endif

	/* Room for a header? */
	Assert(size > MAXALIGN(sizeof(PGShmemHeader)));

	/* Make sure PGSharedMemoryAttach doesn't fail without need */
	UsedShmemSegAddr = NULL;

	/* Loop till we find a free IPC key */
	NextShmemSegID = port * 1000;

	for (NextShmemSegID++;; NextShmemSegID++)
	{
		/* Try to create new segment */
		memAddress = InternalIpcMemoryCreate(NextShmemSegID, size);
		if (memAddress)
			break;				/* successful create and attach */

		/* Check shared memory and possibly remove and recreate */

		if (makePrivate)		/* a standalone backend shouldn't do this */
			continue;

		if ((memAddress = PGSharedMemoryAttach(NextShmemSegID)) == NULL)
			continue;			/* can't attach, not one of mine */

		/*
		 * If I am not the creator and it belongs to an extant process,
		 * continue.
		 */
		hdr = (PGShmemHeader *) memAddress;
		if (hdr->creatorPID != getpid())
		{
			if (kill(hdr->creatorPID, 0) == 0 || errno != ESRCH)
			{
				munmap(memAddress, hdr->totalsize);
				continue;		/* segment belongs to a live process */
			}
		}

		/*
		 * The segment appears to be from a dead Postgres process, or from a
		 * previous cycle of life in this same process.  Zap it, if possible.
		 * This probably shouldn't fail, but if it does, assume the segment
		 * belongs to someone else after all, and continue quietly.
		 */
		GenerateIPCName(NextShmemSegID, keyName);
		
		munmap(memAddress, hdr->totalsize);
		if (shm_unlink(keyName) < 0)
			continue;

		/*
		 * Now try again to create the segment.
		 */
		memAddress = InternalIpcMemoryCreate(NextShmemSegID, size);
		if (memAddress)
			break;				/* successful create and attach */

		/*
		 * Can only get here if some other process managed to create the same
		 * shmem key before we did.  Let him have that one, loop around to try
		 * next key.
		 */
	}

	/*
	 * OK, we created a new segment.  Mark it as created by this process. The
	 * order of assignments here is critical so that another Postgres process
	 * can't see the header as valid but belonging to an invalid PID!
	 */
	hdr = (PGShmemHeader *) memAddress;
	hdr->creatorPID = getpid();
	hdr->magic = PGShmemMagic;

#ifndef WIN32
	/* Fill in the data directory ID info, too */
	if (stat(DataDir, &statbuf) < 0)
		ereport(FATAL,
				(errcode_for_file_access(),
				 errmsg("could not stat data directory \"%s\": %m",
						DataDir)));
	hdr->device = statbuf.st_dev;
	hdr->inode = statbuf.st_ino;
#endif

	/*
	 * Initialize space allocation status for segment.
	 */
	hdr->totalsize = size;
	hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));

	/* Save info for possible future use */
	UsedShmemSegAddr = memAddress;
	UsedShmemSegID = (unsigned long) NextShmemSegID;

	return hdr;
}

#ifdef EXEC_BACKEND

/*
 * PGSharedMemoryReAttach
 *
 * Re-attach to an already existing shared memory segment.	In the non
 * EXEC_BACKEND case this is not used, because postmaster children inherit
 * the shared memory segment attachment via fork().
 *
 * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
 * routine.  The caller must have already restored them to the postmaster's
 * values.
 */
void
PGSharedMemoryReAttach(void)
{
	int fd;
	void	   *hdr;
	void	   *origUsedShmemSegAddr = UsedShmemSegAddr;

	Assert(UsedShmemSegAddr != NULL);
	Assert(IsUnderPostmaster);

#ifdef __CYGWIN__
	/* cygipc (currently) appears to not detach on exec. */
	PGSharedMemoryDetach();
	UsedShmemSegAddr = origUsedShmemSegAddr;
#endif

	elog(DEBUG3, "attaching to %p", UsedShmemSegAddr);
	hdr = (void *) PGSharedMemoryAttach((int) UsedShmemSegID);
	if (hdr == NULL)
		elog(FATAL, "could not reattach to shared memory (key=%d, addr=%p): %m",
			 (int) UsedShmemSegID, UsedShmemSegAddr);
	if (hdr != origUsedShmemSegAddr)
		elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
			 hdr, origUsedShmemSegAddr);

	UsedShmemSegAddr = hdr;		/* probably redundant */
}
#endif   /* EXEC_BACKEND */

/*
 * PGSharedMemoryDetach
 *
 * Detach from the shared memory segment, if still attached.  This is not
 * intended for use by the process that originally created the segment
 * (it will have an on_shmem_exit callback registered to do that).	Rather,
 * this is for subprocesses that have inherited an attachment and want to
 * get rid of it.
 */
void
PGSharedMemoryDetach(void)
{
	PGShmemHeader  *hdr;
	if (UsedShmemSegAddr != NULL)
	{
		hdr = (PGShmemHeader *) UsedShmemSegAddr;
		if (munmap(UsedShmemSegAddr, hdr->totalsize) < 0)
			elog(LOG, "munmap(%p) failed: %m", UsedShmemSegAddr);
		UsedShmemSegAddr = NULL;
	}
}


/*
 * Attach to shared memory and make sure it has a Postgres header
 *
 * Returns attach address if OK, else NULL
 */
static PGShmemHeader *
PGSharedMemoryAttach(int key)
{
	PGShmemHeader *hdr;
	char		keyName[IPCNameLength];
	Size		size;
	int			fd;
	
	GenerateIPCName(key, keyName);
	if ((fd = shm_open(keyName, O_RDWR, 0)) < 0)
		return NULL;

	hdr = (PGShmemHeader *) mmap(UsedShmemSegAddr, sizeof(PGShmemHeader),
								 PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);

	if (hdr == (PGShmemHeader *) -1)
	{
		close(fd);
		return NULL;			/* failed: must be some other app's */
	}

	if (hdr->magic != PGShmemMagic)
	{
		close(fd);
		munmap((void *) hdr, sizeof(PGShmemHeader));
		return NULL;			/* segment belongs to a non-Postgres app */
	}
	
	/* Since the segment has a valid Postgres header, unmap and re-map it with the proper size */
	size = hdr->totalsize;
	munmap((void *) hdr, sizeof(PGShmemHeader));
	hdr = (PGShmemHeader *) mmap(UsedShmemSegAddr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
	close(fd);
	
	return hdr;
}
