/* log.c - generic code for generating PIW log entries
 *
 * author(s): Tom Lord
 ****************************************************************
 * Copyright (C) 1998 UUNET Technologies, Inc.
 *
 * See the file "COPYING.PIW" for further information
 * about the copyright status of this work.
 */


#include "hackerlab/os/alloca.h"
#include "hackerlab/os/sys/wait.h"
#include "hackerlab/os/sys/socket.h"
#include "hackerlab/os/sys/un.h"
#include "hackerlab/os/netinet/in.h"
#include "hackerlab/os/arpa/inet.h"
#include "hackerlab/os/netdb.h"
#include "hackerlab/bugs/panic.h"
#include "hackerlab/char/str.h"
#include "hackerlab/mem/mem.h"
#include "hackerlab/fmt/cvt.h"
#include "hackerlab/fs/file-names.h"
#include "hackerlab/vu/vu.h"
#include "hackerlab/piw/variables.h"
#include "hackerlab/piw/log.h"



extern char ** environ;


/************************************************************************
 *(h1 "PIW Log Entries"
 *    :includes ("hackerlab/piw/piw.h"))
 * 
 * During a run of the instrumented program, PIW instrumentation can
 * produce a log file which is interpreted, reported, and summarized
 * by some of the PIW utility programs.
 */

/*(h2 "Specifying a Log File")
 * 
 * To cause an instrumented program to generate a log file, specify a
 * log file name by setting the environment variable `PIWLOG'.  If a
 * log already exists, it will not ordinarily be overwritten.  If you
 * set the environment variable `PIWLOGCLOBBER' to some value other
 * than 0, then existing log files will be replaced.
 * 
 * 	% PIWLOG=",log"
 * 	% PIWLOGCLOBBER=1 
 * 	% export PIWLOG PIWLOGCLOBBER
 */

/*(h3 "Writing Logs to Special Files")
 * 
 * If the variable `PIWLOG' is set to a string of one of the forms:
 * 
 * 	inet://<host>:<port>
 * 	unix://<path>
 * 
 * the instrumented program attempts to connect to a socket at the
 * indicated address, and then to write the log to that socket.
 * 
 * These values of `PIWLOG' are also special (and have obvious meanings):
 * 
 * 	stdout://
 * 	stderr://
 * 	descriptor://<n>
 * 
 * Note: to handle special log file names of the "inet:" and "unix:"
 * variety, the program "piw-glob" must either be installed where PIW
 * expects, or be present in the directory where "libpiw" was compiled.
 * "piw-glob" is ordinarily installed along with "libpiw".
 */

/*(h3 "Buffering")
 * 
 * By default, records are written immediately (without buffering).
 * The variable `piw_log_buffer_size' can be used to change that
 * behavior, reducing the number of system calls made by the PIW
 * run-time system:
 * 
 * 	 % PIWFLAGS="... piw_log_buffer_size=<n> ..."
 * 
 * Replace `<n>' with the number of bytes to buffer between writes.
 * 
 * Be aware that if your program exits abnormally, due to a fatal signal,
 * some buffered log entries might not be written.
 */

/************************************************************************
 *(h2 "Interpreting Log Files")
 * 
 * PIW log files are binary files.  To translate a binary log to a
 * plain-text log, use the command:
 * 
 *   % piw-printlog < ,log > ,text-log
 * 
 * A text log may be interesting to examine with a text editor, but it
 * is difficult to interpret ``by eye''.  To convert a text log into a
 * formatted report which summarizes and analyzes the information it
 * contains, use the command:
 * 
 *   % piw-analyze < ,text-log > ,report
 * 
 * It is possible to analyze a log file in real-time, as it is being
 * generated.  This can be especially useful while your program is
 * running under a symbolic debugger.
 * 
 * The program `piw-analyze' accepts the option `-r' (aka
 * `--real-time'):
 * 
 *   % PIWLOG="stdout://" yourprog | piw-printlog | piw-analyze -r
 * 
 * Given the `-r' flag, `piw-analyze' will print error and warning
 * messages as soon as the corresponding log entries arrive.  The
 * complete report will be printed in the usual way after the entire
 * log has been read.
 * 
 * When running under a window system, it may be useful to start
 * `piw-analyze' in one window, and the debugger in another, using a
 * unix-domain socket to connect your program to `piw-analyze':
 * 
 *   # In one window, print real-time messages to standard output
 *   # and store the final report in "report.txt"
 *   #
 *   % piw-printlog unix://~/.piw | piw-analyze -r -o report.txt
 * 
 *   # In another window, run your program, sending the log to 
 *   # the pipeline started above:
 *   #
 *   % PIWLOG="unix://~/.piw"
 *   % export PIWLOG
 *   % yourprog
 * 
 * or
 * 
 *   % PIWLOG="unix://~/.piw"
 *   % export PIWLOG
 *   % gdb yourprog
 *   (gdb) run 
 */

/*(h2 "Log File Formats")
 *
 * Several different kinds of PIW instrumentation generate log
 * records.  For simplicity, all of these records are merged into a
 * single stream of output.  For efficiency (to not slow down the
 * instrumented program any more than necessary) some of the logs
 * contain binary data.  For extensibility (to allow new kinds of
 * logging to be added without necessarily modifying programs that
 * already read the logs) a generic record format is defined.
 * 
 * The syntax of a log file is simply:
 * 
 * 	log-file	::   log-file record
 * 			   | <empty>
 * 
 * 	record		:: record-type <spaces> record-length "\n"
 * 			   data "\n"
 * 
 *      record-type	:: [-a-zA-Z0-9_][-a-zA-Z0-9_]*
 * 
 * 	record-length	::   [1-9][0-9]* 
 *			   | 0
 * 
 * 	data		:: arbitrary binary data
 * 
 * Note that the number of bytes of `data' is specified by the decimal
 * integer `record-length'.
 * 
 * That format allows unrecognized record types to be skipped.  It
 * allows arbitrary binary data in each record.  It avoids making
 * complex format conversions in the instrumented program.  It makes
 * it possible to examine a binary log with a text editor (if that
 * editor can handle binary data).
 * 
 * The instrumented program must be run on a machine of the same
 * architecture (or at least the same word size, byte order, and
 * structure layout conventions) as the programs that read the log.
 * 
 * To the greatest extent possible, only one program will ever read a
 * binary log: `piw-printlog'.  Its job is to translate a binary log
 * into a plain-text log, which other programs then interpret further.
 * A plain text log is a series of records, each represented by a list
 * in Scheme syntax.  The first element of each list is a record type.
 * The remaining elements are the fields of the record.
 * 
 * For example, the piw version of `malloc' logs calls to using a
 * structure like this:
 * 
 * 	struct piw_malloc_record_data 	// record-type: malloc
 * 	{
 * 	  int call_number;
 * 	  int amt_requested;
 * 	  void * new_block;
 * 	};
 * 
 * A typical binary log file entry for that type of structure is:
 * 
 * 	malloc 12
 * 	^@^@^@^@^E^F^@^@^D`^@^H
 * 
 * The corresponding text log entry looks like:
 * 
 * 	(malloc 0 1541 134242308)
 * 
 * meaning:
 * 
 *	The first call to malloc (`call_number == 0')
 * 	requested 1541 bytes (`amt_requested == 1541')
 * 	and returned 0x8006004 (`new_block == 134242308')
 */

/****************************************************************
 * Initializing the Log File Descriptor.
 * 
 * There is some trickiness here.  We want to handle complex log
 * file names such as "unix://...", but, because this is low-level
 * support for functions such as `malloc', we must avoid calling
 * `malloc' or other complex parts of libc.
 * 
 * We use a subprocesses, "piw-glob", to handle complex parts of
 * interpreting log file names.
 */

/* log_fd
 * 
 * The descriptor on which log file records are written.
 */
static int log_fd = -1;

/* buffer, buffer_size, buffered
 * 
 * buffer_size	- the number of bytes of log file records to buffer.
 * buffer - buffer space for log file records.
 * buffered - the number of bytes currently buffered.
 *
 */
static t_uchar * buffer = 0;
static int buffer_size = 0;
static int buffered = 0;

/*
 * static void finish (void);
 * 
 * An `atexit' handler which flushes buffered log records.
 */
static void
finish (void)
{
  int errn;
  if (buffered && (log_fd >= 0))
    vu_write_retry (&errn, log_fd, buffer, buffered);
}


/*
 * static void piw_run_piw_glob (int pipe_fd[2], char ** argv);
 * 
 * 
 */
static void
piw_run_piw_glob (int pipe_fd[2], char ** argv)
{
  static char prog_dir[] = PROGDIR;
  static char build_dir[] = BUILDDIR;
  static char prog_glob[] = "piw-glob";
  char prog[PATH_MAX + 1];
  
  log_fd = -2;	 		/* disable logging in the child process */
  close (pipe_fd[0]);
  if (PATH_MAX < str_length (prog_dir) + 1 + str_length (prog_glob))
    panic ("path to piw-glob too long");
  str_cpy (prog, prog_dir);
  str_cat (prog, "/");
  str_cat (prog, prog_glob);
  execve (prog, argv, environ);
  
  if (PATH_MAX < str_length (build_dir) + 1 + str_length (prog_glob))
    panic ("path to piw-glob too long");
  str_cpy (prog, build_dir);
  str_cat (prog, "/");
  str_cat (prog, prog_glob);
  execve (prog, argv, environ);
  panic ("unable to exec piw-glob");
}


static void
piw_init_log (void)
{
  t_uchar * name;
  char * clobberspec;
  int clobber;

  buffer_size = piw_get_flag ("piw_log_buffer_size");
  if (buffer_size)
    {
      buffer = (t_uchar *)malloc (buffer_size);
      if (!buffer)
	panic ("unable to allocate piw log buffer");
    }

  name = getenv ("PIWLOG");
  if (!name || !name[0])
    {
      log_fd = -2;
      return;
    }

  clobberspec = getenv ("PIWLOGCLOBBER");
  clobber = clobberspec && str_cmp (clobberspec, "0");

  if (!str_cmp ("stdout://", name))
    log_fd = 1;
  else if (!str_cmp ("stderr://", name))
    log_fd = 2;
  else if (!str_cmp_prefix ("descriptor://", name))
    {
      int errn;
      t_uchar * fd_name;
      unsigned int fd_name_len;

      fd_name = name + sizeof ("descriptor://") - 1;
      fd_name_len = str_length (fd_name);
      if (0 > cvt_decimal_to_uint (&errn, &log_fd, fd_name, fd_name_len))
	panic ("bogus descriptor number in $PIWLOG");
    }
  else if (!str_cmp_prefix ("inet://", name))
    {
      t_uchar * host_and_port;
      unsigned int hplen;
      t_uchar * host;
      t_uchar * colon;
      t_uchar * port_name;
      unsigned int port_len;
      int port;
      struct sockaddr_in addr;
      int errn;

      host_and_port = name + sizeof ("inet://") - 1;
      hplen = str_length (host_and_port);
      host = (t_uchar *)alloca (hplen + 1);
      mem_move (host, host_and_port, hplen + 1);
      colon = str_chr_index (host, ':');
      if (!colon)
	panic ("ill formed host address in $PIWLOG");
      *colon = 0;
      port_name = colon + 1;
      port_len = (host + hplen) - (colon + 1);
      if (0 > cvt_decimal_to_uint (&errn, &port, port_name, port_len))
	panic ("ill formed port address in $PIWLOG");


      /* The functions gethostbyaddr and gethostbyname are likely to call
       * malloc which will cause a hopeless reentrancy problem.
       * So we fork a sub-process to handle those calls.
       */
      {
	int pipe_fd[2];
	int child;
	char inet[1024];

	if (pipe (pipe_fd) < 0)
	  panic ("unable to create pipe in piw_init_log");
	
	child = fork ();
	if (child == 0)
	  {
	    char fd_name[PATH_MAX + 1];
	    char fd_url[PATH_MAX + 1];
	    char * argv[64];
	    int argc;

	    argc = 0;
	    argv[argc++] = "piw-glob";
	    argv[argc++] = "-o";
	    str_cpy (fd_url, "fd:");
	    cvt_long_to_decimal (fd_name, pipe_fd[1]);
	    str_cat (fd_url, fd_name);
	    argv[argc++] = fd_url;
	    argv[argc++] = "--gethost";
	    argv[argc++] = host;
	    argv[argc++] = 0;
	    piw_run_piw_glob (pipe_fd, argv);
	  }
	else
	  {
	    int errn;
	    int status;

	    close (pipe_fd[1]);
	    if (0 > vu_read_retry (&errn, pipe_fd[0], inet, sizeof (inet)))
	      panic ("unable to read network address for $PIWLOG");
	    if (0 > waitpid (child, &status, 0))
	      panic ("error waiting for child process in piw_init_log");
	  }
	addr.sin_family = AF_INET;
	addr.sin_port = htons (port);
	if (!inet_aton (inet, &addr.sin_addr))
	  panic ("bizarre internet address returned by piw-glob");
      }
      log_fd = socket (PF_INET, SOCK_STREAM, 0);
      if (log_fd < 0)
	panic ("unable to create socket for $PIWLOG");
      if (0 > connect (log_fd, (struct sockaddr *)&addr, sizeof (addr)))
	panic ("unable to connect socket for $PIWLOG");
    }
  else if (!str_cmp_prefix ("unix:", name))
    {
      t_uchar * path;
      int path_len;
      struct sockaddr_un addr;
      int pipe_fd[2];
      int child;
      t_uchar expanded_path[PATH_MAX + 1];
      int errn;

      path = name + sizeof ("unix:") - 1;
      path_len = str_length (path);
      addr.sun_family = AF_UNIX;
      
      /* Before setting addr.sun_path, twiddle-expand the file name.
       * But that can call malloc which will cause a hopeless reentrancy problem.
       * So we fork a sub-process to handle that expansion.
       */
      if (pipe (pipe_fd) < 0)
	panic ("unable to create pipe in piw_init_log");
      child = fork ();
      if (child == 0)
	{
	  char fd_name[PATH_MAX + 1];
	  char fd_url[PATH_MAX + 1];
	  char * argv[64];
	  int argc;

	  argc = 0;
	  argv[argc++] = "piw-glob";
	  argv[argc++] = "-o";
	  str_cpy (fd_url, "fd:");
	  cvt_long_to_decimal (fd_name, pipe_fd[1]);
	  str_cat (fd_url, fd_name);
	  argv[argc++] = fd_url;
	  argv[argc++] = "--expand";
	  argv[argc++] = path;
	  argv[argc++] = 0;
	  piw_run_piw_glob (pipe_fd, argv);
	}
      else
	{
	  int len;
	  int status;

	  close (pipe_fd[1]);
	  len = vu_read_retry (&errn, pipe_fd[0], expanded_path, PATH_MAX + 1);
	  if (len < 0)
	    panic ("error reading expanded pathname for $PIWLOG");
	  /* expanded_path[len] is '\n' */
	  expanded_path[len - 1] = 0;
	  if (len + 1 > sizeof (addr.sun_path))
	    panic ("$PIWLOG pathname too long for unix-domain socket name");
	  if (0 > waitpid (child, &status, 0))
	    panic ("error waiting for child process in piw_init_log");
	  str_cpy (addr.sun_path, expanded_path);
	}
      log_fd = socket (PF_UNIX, SOCK_STREAM, 0);
      if (log_fd < 0)
	panic ("unable to create socket for $PIWLOG");
      if (0 > connect (log_fd, (struct sockaddr *)&addr, sizeof (addr)))
	panic ("unable to connect socket for $PIWLOG");
    }
  else 
    {
      /* Don't use "vu_open" here because that calls malloc.
       */
      log_fd = open (name, (clobber ? 0 : O_EXCL) | O_CREAT | O_WRONLY, 0666);
      
      if (log_fd < 0)
	panic ("unable to open $PIWLOG");
      
      if (0 > ftruncate (log_fd, 0))
	panic ("unable to truncate $PIWLOG");
    }

  atexit (finish);
  /* piw_log_init (getpid ()); */
}


/************************************************************************
 *(h2 "Generating PIW Log Entries")
 * 
 */

/*(c piw_log)
 * void piw_log (char * record_name, void * data, int record_length);
 * 
 * Generate one PIW log entry.
 * 
 * This is low-level support for writing log entries.  This function
 * is usually not called directly.  See xref:"Defining New Log Record
 * Types".
 *
 * `record_name' -- a 0-terminated record name.  The name is not
 * checked for syntactic validity.
 *
 * `record_length' -- the number of bytes of data in the record.
 *
 * `data' -- the log data itself.
 * 
 * The first time it is called, this function will check the
 * environment variables `PIWLOG' and `PIWLOGCLOBBER' to open the
 * log file.  See xref:"Specifying a Log File".
 */
void
piw_log (char * record_name, void * data, int record_length)
{
  t_uchar lbuf[32];
  int lb_len;
  int rn_len;
  int new_output;
  t_uchar * rec;
  int write_it;
  int pos;
  int errn;

  if (log_fd == -1)
    piw_init_log ();

  if (log_fd == -2)
    return;

  lbuf[0] = ' ';
  cvt_long_to_decimal (lbuf + 1, record_length);
  str_cat (lbuf, "\n");
  lb_len = str_length (lbuf);
  rn_len = str_length (record_name);

  new_output = (rn_len + lb_len + record_length + 1);

  if (   buffered
      && ((buffer_size - buffered) < new_output))
    {
      if (0 > vu_write_retry (&errn, log_fd, buffer, buffered))
	panic ("write failure in piw_log");
      buffered = 0;
    }

  if (buffer_size < new_output)
    {
      rec = alloca (new_output);
      write_it = 1;
    }
  else
    {
      rec = buffer + buffered;
      write_it = 0;
    }

  pos = 0;
  mem_move (rec + pos, record_name, rn_len);
  pos += rn_len;
  mem_move (rec + pos, lbuf, lb_len);
  pos += lb_len;
  mem_move (rec + pos, data, record_length);
  pos += record_length;
  rec[pos] = '\n';
  ++pos;

  if (write_it)
    {
      if (0 > vu_write_retry (&errn, log_fd, rec, pos))
	panic ("write failure in piw_log (2)");
    }
  else
    buffered += pos;
}


/*(c piw_flush_log)
 * void piw_flush_log (void);
 * 
 * Flush (write) buffered PIW log entries.
 */
void
piw_flush_log (void)
{
  int errn;
  if (buffered && (log_fd >= 0))
    {
      if (0 > vu_write_retry (&errn, log_fd, buffer, buffered))
	panic ("unable to flush PIW log");
      buffered = 0;
    }
}




/*(h2 "Defining New Log Record Types")
 * 
 * At this time, if `piw-printlog' encounters a record type it does
 * not understand, it prints a warning message and skips the record.
 * If you define a new log record type, you must modify `piw-printlog'.
 * 
 * In the source directory for `piw-malloc' is a subdirectory called
 * `=scaffolding'.  It contains programs that automatically generate
 * all of the necessary code for `piw-printlog' from high-level
 * specifications.
 * 
 * In future releases, the process for modifying `piw-printlog' will 
 * be simplified.
 */

