/*
 * nasd_control.c
 *
 * Operations on NASD control objects.
 *
 * Authors: Jim Zelenka, David Rochberg
 */
/*
 * Copyright (c) of Carnegie Mellon University, 1997,1998,1999,2000.
 *
 * Permission to reproduce, use, and prepare derivative works of
 * this software for internal use is granted provided the copyright
 * and "No Warranty" statements are included with all reproductions
 * and derivative works. This software may also be redistributed
 * without charge provided that the copyright and "No Warranty"
 * statements are included in all redistributions.
 *
 * NO WARRANTY. THIS SOFTWARE IS FURNISHED ON AN "AS IS" BASIS.
 * CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER
 * EXPRESSED OR IMPLIED AS TO THE MATTER INCLUDING, BUT NOT LIMITED
 * TO: WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY
 * OF RESULTS OR RESULTS OBTAINED FROM USE OF THIS SOFTWARE. CARNEGIE
 * MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT
 * TO FREEDOM FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
 */


#include <nasd/nasd_options.h>
#include <nasd/nasd_drive_options.h>

#include <nasd/nasd_types.h>
#include <nasd/nasd_freelist.h>
#include <nasd/nasd_itypes.h>
#include <nasd/nasd_mem.h>
#include <nasd/nasd_cache.h>
#include <nasd/nasd_common.h>
#include <nasd/nasd_timer.h>
#include <nasd/nasd_trace_dr.h>
#include <nasd/nasd_drive_io.h>
#include <nasd/nasd_layout.h>
#include <nasd/nasd_ioqueue.h>
#include <nasd/nasd_control_marshall.h>
#include <nasd/nasd_types_marshall.h>
#include <nasd/nasd_drive_build_stamp.h>

/*
 * See nasd_decompose.c for a list of what node numbers
 * correspond to what objects.
 */

nasd_freelist_t *nasd_free_pagebuf = NULL;
#define NASD_MAX_FREE_PAGEBUF  4
#define NASD_PAGEBUF_INC       2
#define NASD_PAGEBUF_INITIAL   2

extern nasd_nodenum_t nasd_reserved_nodes;
extern nasd_ctrl_drive_opstats_t nasd_drive_opstats;


void
nasd_obj_control_shutdown(
  void  *arg)
{
  NASD_FREELIST_DESTROY(nasd_free_pagebuf,next,(nasd_pagebuf_t *));
}


nasd_status_t
nasd_obj_control_init()
{
  nasd_status_t rc;

  NASD_ASSERT(sizeof(nasd_ctrl_drive_info_t) == NASD_INFO_PAGESIZE);
  NASD_ASSERT(sizeof(nasd_ctrl_part_info_t) == NASD_INFO_PAGESIZE);
  NASD_ASSERT(sizeof(nasd_ctrl_drive_opstats_t) == NASD_INFO_PAGESIZE);
  NASD_ASSERT(sizeof(nasd_ctrl_cache_stat_t) == NASD_INFO_PAGESIZE);
  NASD_ASSERT(sizeof(nasd_ctrl_layout_stat_t) == NASD_INFO_PAGESIZE);
  NASD_ASSERT(sizeof(nasd_ctrl_ioqueue_stat_t) == NASD_INFO_PAGESIZE);
  NASD_ASSERT(sizeof(nasd_ctrl_io_stat_t) == NASD_INFO_PAGESIZE);
  NASD_ASSERT(sizeof(nasd_ctrl_io_size_stat_t) == NASD_INFO_PAGESIZE);
  NASD_ASSERT(sizeof(nasd_ctrl_trace_basic_t) == NASD_INFO_PAGESIZE);
  NASD_ASSERT(NASD_DRIVE_CACHE_TYPES == NASD_ODC_T_NUM);

#if 0
  {
    nasd_ctrl_drive_opstats_t *p = &nasd_drive_opstats;
    nasd_printf("\n\nXXX.  Fix this.  size of ops in opstats: %d\n\n",
                (&p->last_operation_placeholder - &p->null)
                ); /*DB*/
  }
#endif /* active debug */

  NASD_FREELIST_CREATE(nasd_free_pagebuf, NASD_MAX_FREE_PAGEBUF,
    NASD_PAGEBUF_INC, sizeof(nasd_pagebuf_t));
  if (nasd_free_pagebuf == NULL)
    return(NASD_NO_MEM);
  NASD_FREELIST_PRIME(nasd_free_pagebuf, NASD_PAGEBUF_INITIAL,next,
    (nasd_pagebuf_t *));
  rc = nasd_shutdown_proc(nasd_odc_shutdown, nasd_obj_control_shutdown,
    NULL);
  if (rc) {
    nasd_obj_control_shutdown(NULL);
    return(rc);
  }

  return(NASD_SUCCESS);
}


nasd_status_t
nasd_write_drive_info(
  int                       partnum,
  nasd_offset_t             offset,
  nasd_len_t                in_len, 
  nasd_procpipe_t          *byte_pipe,
  nasd_len_t               *out_len)
{
  nasd_status_t         rc;
  nasd_len_t            bytes_recvd_this_time;
  nasd_len_t            bytes_recvd;
  nasd_offset_t         cur_offset;
  nasd_pagebuf_t       *pb=NULL;
  nasd_pagebuf_t       *pb_net=NULL;
  int                   i, off;
  nasd_opstat_t        *p,*q;
  char                 *b1, *b2, *b3;
  nasd_timer_t          tm;
  nasd_timespec_t       ts;
  unsigned long         l1, l2;

  /*
   * Read pagesized chunks on pagealigned boundaries.
   * In other words, whole pages only, please.
   */
  if (in_len % NASD_INFO_PAGESIZE)
    return(NASD_BAD_LEN);
  if (offset % NASD_INFO_PAGESIZE)
    return(NASD_BAD_OFFSET);

  rc = NASD_SUCCESS;
  
  cur_offset = offset;
  NASD_FREELIST_GET(nasd_free_pagebuf,pb,next,(nasd_pagebuf_t *));
  if (pb == NULL) {
    rc = NASD_NO_MEM;
    goto done;
  }
  NASD_FREELIST_GET(nasd_free_pagebuf,pb_net,next,(nasd_pagebuf_t *));
  if (pb_net == NULL) {
    rc = NASD_NO_MEM;
    goto done;
  }
  while((rc == NASD_SUCCESS) && ((*out_len) < in_len)) {
    switch(offset) {
    case 0:
      rc = NASD_BAD_OFFSET;
      goto done;
      /* NOTREACHED */
      break;

    case 8192:
      bytes_recvd=0;
      NASD_TM_START(&tm);
      do {
        rc = byte_pipe->pull(byte_pipe->state,
          (nasd_byte_t *)pb_net->buf+bytes_recvd,
          8192-bytes_recvd, &bytes_recvd_this_time, NULL,
          NULL, NULL, NULL);
        if (rc)
          break;
        bytes_recvd+=bytes_recvd_this_time;
      } while (8192-bytes_recvd > 0);
      NASD_TM_STOP_ELAP_TS(&tm,&ts);
      NASD_ATOMIC_TIMESPEC_ADD(&nasd_drive_cache_stats.write_pipe_stall_time,
        &ts);
      NASD_ATOMIC_ADD64(&nasd_drive_cache_stats.write_bytes, bytes_recvd);
      if (rc)
        break;
      nasd_ctrl_drive_opstats_t_unmarshall(pb_net->buf,
        (nasd_ctrl_drive_opstats_t *)pb->buf);
      p = &nasd_drive_opstats.null;
      q = &(((nasd_ctrl_drive_opstats_t *)pb->buf)->null);

      for (i=0;i<(&nasd_drive_opstats.last_operation_placeholder - &nasd_drive_opstats.null);i++) {
/*CO      for (i=0;i<22;i++) {*/
        p->num_ops = q->num_ops;
        p->op_nsecs = q->op_nsecs;
        p->min_nsecs = q->min_nsecs;
        p->max_nsecs = q->max_nsecs;
        p->invalid = q->invalid;
        p++;q++;
      }

      /* XXX opdepths? */

      break;

    case 16384:
      bytes_recvd=0;

      NASD_TM_START(&tm);
      do {
        rc = byte_pipe->pull(byte_pipe->state,
          (nasd_byte_t *)pb_net->buf+bytes_recvd,
          8192-bytes_recvd, &bytes_recvd_this_time, NULL,
          NULL, NULL, NULL);
        if (rc)
          break;
        bytes_recvd+=bytes_recvd_this_time;
      } while (8192-bytes_recvd > 0);
      NASD_TM_STOP_ELAP_TS(&tm,&ts);
      NASD_ATOMIC_TIMESPEC_ADD(&nasd_drive_cache_stats.write_pipe_stall_time,
        &ts);
      NASD_ATOMIC_ADD64(&nasd_drive_cache_stats.write_bytes, bytes_recvd);
      nasd_ctrl_cache_stat_t_unmarshall(pb_net->buf,
        (nasd_ctrl_cache_stat_t *)pb->buf);

      b1 = (char *)&nasd_drive_cache_stats;
      b3 = (char *)nasd_drive_cache_stats.lookup;
      b2 = (char *)pb->buf;
      l1 = (unsigned long)b1;
      l2 = (unsigned long)b3;
      off = l2-l1;
      bcopy(b2+off, b1+off, NASD_INFO_PAGESIZE-off);

      break;

    case 24576:
      bytes_recvd=0;

      NASD_TM_START(&tm);
      do {
        rc = byte_pipe->pull(byte_pipe->state,
          (nasd_byte_t *)pb_net->buf+bytes_recvd,
          8192-bytes_recvd, &bytes_recvd_this_time, NULL,
          NULL, NULL, NULL);
        if (rc)
          break;
        bytes_recvd+=bytes_recvd_this_time;
      } while (8192-bytes_recvd > 0);
      NASD_TM_STOP_ELAP_TS(&tm,&ts);
      NASD_ATOMIC_TIMESPEC_ADD(&nasd_drive_cache_stats.write_pipe_stall_time,
        &ts);
      NASD_ATOMIC_ADD64(&nasd_drive_cache_stats.write_bytes, bytes_recvd);
      nasd_ctrl_io_stat_t_unmarshall(pb_net->buf,
        (nasd_ctrl_io_stat_t *)pb->buf);

      /* don't overwrite ctrl id */
      b1 = (char *)&pb->buf[0];
      b2 = (char *)&nasd_drive_io_stats;
      bcopy(&b1[8], &b2[8], NASD_INFO_PAGESIZE-8);

      break;

    case 49152:
      bytes_recvd=0;

      NASD_TM_START(&tm);
      do {
        rc = byte_pipe->pull(byte_pipe->state,
          (nasd_byte_t *)pb_net->buf+bytes_recvd,
          8192-bytes_recvd, &bytes_recvd_this_time, NULL,
          NULL, NULL, NULL);
        if (rc)
          break;
        bytes_recvd+=bytes_recvd_this_time;
      } while (8192-bytes_recvd > 0);
      NASD_TM_STOP_ELAP_TS(&tm,&ts);
      NASD_ATOMIC_TIMESPEC_ADD(&nasd_drive_cache_stats.write_pipe_stall_time,
        &ts);
      NASD_ATOMIC_ADD64(&nasd_drive_cache_stats.write_bytes, bytes_recvd);
      nasd_ctrl_layout_stat_t_unmarshall(pb_net->buf,
        (nasd_ctrl_layout_stat_t *)pb->buf);

      /* don't overwrite ctrl_id or layout_name */
      b1 = (char *)&pb->buf[0];
      b2 = (char *)&nasd_drive_layout_stats;
      bcopy(&b1[72], &b2[72], NASD_INFO_PAGESIZE-72);

      break;

    case 57344:
      bytes_recvd=0;

      NASD_TM_START(&tm);
      do {
        rc = byte_pipe->pull(byte_pipe->state,
          (nasd_byte_t *)pb_net->buf+bytes_recvd,
          8192-bytes_recvd, &bytes_recvd_this_time, NULL,
          NULL, NULL, NULL);
        if (rc)
          break;
        bytes_recvd+=bytes_recvd_this_time;
      } while (8192-bytes_recvd > 0);
      NASD_TM_STOP_ELAP_TS(&tm,&ts);
      NASD_ATOMIC_TIMESPEC_ADD(&nasd_drive_cache_stats.write_pipe_stall_time,
        &ts);
      NASD_ATOMIC_ADD64(&nasd_drive_cache_stats.write_bytes, bytes_recvd);
      nasd_ctrl_ioqueue_stat_t_unmarshall(pb_net->buf,
        (nasd_ctrl_ioqueue_stat_t *)pb->buf);

      /* don't overwrite ctrl_id, ioqueue_name, or max_ios_outstanding */
      b1 = (char *)&pb->buf[0];
      b2 = (char *)&nasd_drive_ioqueue_stats;
      bcopy(&b1[76], &b2[76], NASD_INFO_PAGESIZE-76);

      break;


    default:
      rc = NASD_BAD_OFFSET;
      goto done;
      /* NOTREACHED */
      break;
    }
    *out_len+=NASD_INFO_PAGESIZE;
    offset+=NASD_INFO_PAGESIZE;
  }
  /* nasd_od_ops terminates the pipe*/
 done:
  if (pb) {
    NASD_FREELIST_FREE(nasd_free_pagebuf,pb,next);
  }
  if (pb_net) {
    NASD_FREELIST_FREE(nasd_free_pagebuf,pb_net,next);
  }

  return (rc);
}

nasd_status_t
nasd_write_part_info(
  int                       partnum,
  nasd_offset_t             offset,
  nasd_len_t                in_len, 
  nasd_procpipe_t          *byte_pipe,
  nasd_len_t               *out_len)
{
  nasd_len_t bytes_recvd_this_time, bytes_recvd;
  nasd_ctrl_part_info_t *partinfo;
  nasd_pagebuf_t *pb_net, *pb;
  nasd_offset_t cur_offset;
  nasd_timespec_t ts;
  nasd_status_t rc;
  nasd_timer_t tm;

  /*
   * Read pagesized chunks on pagealigned boundaries.
   * In other words, whole pages only, please.
   */
  if (in_len % NASD_INFO_PAGESIZE)
    return(NASD_BAD_LEN);
  if (offset % NASD_INFO_PAGESIZE)
    return(NASD_BAD_OFFSET);

  rc = NASD_SUCCESS;
  
  cur_offset = offset;
  NASD_FREELIST_GET(nasd_free_pagebuf,pb,next,(nasd_pagebuf_t *));
  if (pb == NULL) {
    rc = NASD_NO_MEM;
    goto done;
  }
  NASD_FREELIST_GET(nasd_free_pagebuf,pb_net,next,(nasd_pagebuf_t *));
  if (pb_net == NULL) {
    rc = NASD_NO_MEM;
    goto done;
  }
  while((rc == NASD_SUCCESS) && ((*out_len) < in_len)) {
    switch(offset) {
      case 0:
        bytes_recvd = 0;

        NASD_TM_START(&tm);
        do {
          rc = byte_pipe->pull(byte_pipe->state,
            (nasd_byte_t *)pb_net->buf+bytes_recvd,
            8192-bytes_recvd, &bytes_recvd_this_time, NULL,
            NULL, NULL, NULL);
          if (rc)
            break;
          bytes_recvd += bytes_recvd_this_time;
        } while (8192-bytes_recvd > 0);
        NASD_TM_STOP_ELAP_TS(&tm,&ts);
        NASD_ATOMIC_TIMESPEC_ADD(&nasd_drive_cache_stats.write_pipe_stall_time,
          &ts);
        NASD_ATOMIC_ADD64(&nasd_drive_cache_stats.write_bytes, bytes_recvd);
        if (rc)
          goto done;
        nasd_ctrl_part_info_t_unmarshall(pb_net->buf,
          (nasd_ctrl_part_info_t *)pb->buf);
        partinfo = (nasd_ctrl_part_info_t *)pb->buf;

        rc = nasd_od_change_partition(partnum,
          partinfo->part_size, partinfo->min_protection,
          partinfo->first_obj);

        break;
      default:
        rc = NASD_BAD_OFFSET;
        goto done;
        /* NOTREACHED */
        break;
    }
  }
  /* nasd_od_ops terminates the pipe*/
done:
  if (pb) {
    NASD_FREELIST_FREE(nasd_free_pagebuf,pb,next);
  }
  if (pb_net) {
    NASD_FREELIST_FREE(nasd_free_pagebuf,pb_net,next);
  }

  return (rc);
}

nasd_status_t
nasd_read_drive_info(
  nasd_pagebuf_t           *pb,
  nasd_pagebuf_t           *pb_net,
  int                       partnum,
  nasd_offset_t             offset,
  nasd_len_t                in_len,
  int                       is_read2,
  nasd_procpipe_t          *byte_pipe,
  nasd_len_t               *out_len)
{
  nasd_ctrl_drive_info_t *info;
  nasd_od_part_t *part;
  nasd_otw_base_t *buf;
  nasd_timespec_t ts;
  nasd_status_t rc;
  nasd_timer_t tm;
  int i;

  /*
   * Read pagesized chunks on pagealigned boundaries.
   * In other words, whole pages only, please.
   */

  if (in_len % NASD_INFO_PAGESIZE) 
    return(NASD_BAD_LEN);

  if (offset % NASD_INFO_PAGESIZE) 
    return(NASD_BAD_OFFSET);

  rc = NASD_SUCCESS;
  buf = pb->buf;

  while((*out_len) < in_len) {
    switch(offset) {
      case 0:
        info = (nasd_ctrl_drive_info_t *)buf;
        info->ctrl_id = NASD_CTRL_DRIVE_INFO;
        info->max_parts = NASD_OD_MAXPARTS;
        info->blocksize = NASD_OD_BASIC_BLOCKSIZE;
        info->num_parts = 0;
        bcopy(nasd_drive_build_stamp,info->build_stamp,sizeof(info->build_stamp));
        NASD_ODC_LOCK_DISK();
        for(i=0;i<NASD_OD_MAXPARTS;i++) {
          part = &PART(i);
          if (part->part_size)
            info->num_parts++;
        }
        info->num_blocks = nasd_od_blocks;
        info->blocks_allocated = nasd_odc_state->disk->blocks_allocated;
        NASD_ODC_UNLOCK_DISK();
        nasd_ctrl_drive_info_t_marshall(
          (nasd_ctrl_drive_info_t *)pb->buf, pb_net->buf);
        break;
      case 8192:
        nasd_ctrl_drive_opstats_t_marshall(&nasd_drive_opstats, pb_net->buf);
        break;
      case 16384:
        nasd_gettime(&nasd_drive_cache_stats.timestamp);
        nasd_drive_cache_stats.idle.ts_sec = 0;
        nasd_drive_cache_stats.idle.ts_nsec = 0;
        nasd_drive_cache_stats.mem_allocated = nasd_mem_allocated;
#if NASD_IDLE_SUPPORT > 0
        {
          nasd_timespec_t idle_time;
          rc = nasd_get_total_idle_time(&idle_time);
          if (rc == NASD_SUCCESS) {
            nasd_drive_cache_stats.idle = idle_time;
          }
        }
#endif /* NASD_IDLE_SUPPORT > 0 */
        nasd_ctrl_cache_stat_t_marshall(&nasd_drive_cache_stats,
          pb_net->buf);
        break;
      case 24576:
        nasd_ctrl_io_stat_t_marshall(&nasd_drive_io_stats, pb_net->buf);
        break;
      case 32768:
        nasd_ctrl_io_size_stat_t_marshall(&nasd_drive_io_read_size_stats,
          pb_net->buf);
        break;
      case 40960:
        nasd_ctrl_io_size_stat_t_marshall(&nasd_drive_io_write_size_stats,
          pb_net->buf);
        break;
      case 49152:
        nasd_ctrl_layout_stat_t_marshall(&nasd_drive_layout_stats, pb_net->buf);
        break;
      case 57344:
        nasd_ctrl_ioqueue_stat_t_marshall(&nasd_drive_ioqueue_stats,
          pb_net->buf);
        break;

      default:
        NASD_ASSERT(!(offset%NASD_INFO_PAGESIZE));
        /*
         * XXX Is this the right thing to do?
         * Here, we'll send back a page full of
         * zeroes. Why? Well, if the read is for
         * multiple info pages, then we don't want
         * to fail the good parts by returning
         * NASD_BAD_OFFSET or something. However,
         * a "good" page will have an identifier at
         * the top that we'll have zeroed, so the
         * client can tell we didn't know what it
         * was talking about, in case it's asking
         * for something that we don't support but
         * other drives do. As if there were any
         * other drives.
         */
        bzero((char *)pb_net->buf, NASD_INFO_PAGESIZE);
    }
    NASD_TM_START(&tm);
    rc = byte_pipe->push(byte_pipe->state, pb_net->buf, NASD_INFO_PAGESIZE, NULL,
      NULL, NULL);
    NASD_TM_STOP_ELAP_TS(&tm,&ts);
    if (is_read2) {
      NASD_ATOMIC_TIMESPEC_ADD(&nasd_drive_cache_stats.read2_pipe_stall_time,
        &ts);
      NASD_ATOMIC_ADD64(&nasd_drive_cache_stats.read2_bytes,
        NASD_INFO_PAGESIZE);
    }
    else {
      NASD_ATOMIC_TIMESPEC_ADD(&nasd_drive_cache_stats.read_pipe_stall_time,
        &ts);
      NASD_ATOMIC_ADD64(&nasd_drive_cache_stats.read_bytes,
        NASD_INFO_PAGESIZE);
    }
    if (rc)
      break;

    *out_len += NASD_INFO_PAGESIZE;
    offset += NASD_INFO_PAGESIZE;
  }

  return(rc);
}

nasd_status_t
nasd_read_part_info(
  nasd_pagebuf_t           *pb,
  nasd_pagebuf_t           *pb_net,
  int                       partnum,
  nasd_offset_t             offset,
  nasd_len_t                in_len,
  int                       is_read2,
  nasd_procpipe_t          *byte_pipe,
  nasd_len_t               *out_len)
{
  nasd_ctrl_part_info_t *info;
  nasd_odc_icpart_t *icp;
  nasd_od_part_t *part;
  nasd_otw_base_t *buf;
  nasd_timespec_t ts;
  nasd_status_t rc;
  nasd_timer_t tm;

  if (partnum >= NASD_OD_MAXPARTS)
    return(NASD_BAD_PARTITION);

  /*
   * Read pagesized chunks on pagealigned boundaries.
   * In other words, whole pages only, please.
   */
  if (in_len % NASD_INFO_PAGESIZE)
    return(NASD_BAD_LEN);
  if (offset % NASD_INFO_PAGESIZE)
    return(NASD_BAD_OFFSET);


  rc = NASD_SUCCESS;
  buf = pb->buf;

  icp = &nasd_odc_state->parts[partnum];
  part = &PART(partnum);

  NASD_ODC_ICPART_LOCK_READ(icp);

#if 0
  /* send back a page of zeroes instead */
  if (part->part_size == 0) {
    rc = NASD_BAD_PARTITION;
    goto done;
  }
#endif /* 0 */

  while((*out_len) < in_len) {
    switch(offset) {
      case 0:
        if (part->part_size == 0)
          goto zeroblk;
        info = (nasd_ctrl_part_info_t *)buf;
        info->ctrl_id = NASD_CTRL_PART_INFO;
        info->first_obj = part->first_obj;
        info->num_obj = part->num_obj;
        info->part_size = part->part_size;
        info->blocks_used = part->blocks_used;
        info->blocks_allocated = part->blocks_allocated;
        /* XXX mark this infinite once we support it */
        info->max_objs = nasd_odc_state->npt_sz * NASD_OD_NODES_PER_NPT_BLOCK;
        info->blocksize = NASD_OD_BASIC_BLOCKSIZE;
        info->min_protection = part->min_protection;
        nasd_ctrl_part_info_t_marshall(info, pb_net->buf);
        break;
      default:
zeroblk:
        NASD_ASSERT(!(offset%NASD_INFO_PAGESIZE));
        /*
         * XXX Is this the right thing to do?
         * Here, we'll send back a page full of
         * zeroes. Why? Well, if the read is for
         * multiple info pages, then we don't want
         * to fail the good parts by returning
         * NASD_BAD_OFFSET or something. However,
         * a "good" page will have an identifier at
         * the top that we'll have zeroed, so the
         * client can tell we didn't know what it
         * was talking about, in case it's asking
         * for something that we don't support but
         * other drives do. As if there were any
         * other drives.
         */
        bzero((char *)pb_net->buf, NASD_INFO_PAGESIZE);
    }
    NASD_TM_START(&tm);
    rc = byte_pipe->push(byte_pipe->state, pb_net->buf, NASD_INFO_PAGESIZE, NULL,
      NULL, NULL);
    NASD_TM_STOP_ELAP_TS(&tm,&ts);
    if (is_read2) {
      NASD_ATOMIC_TIMESPEC_ADD(&nasd_drive_cache_stats.read2_pipe_stall_time,
        &ts);
      NASD_ATOMIC_ADD64(&nasd_drive_cache_stats.read2_bytes,
        NASD_INFO_PAGESIZE);
    }
    else {
      NASD_ATOMIC_TIMESPEC_ADD(&nasd_drive_cache_stats.read_pipe_stall_time,
        &ts);
      NASD_ATOMIC_ADD64(&nasd_drive_cache_stats.read_bytes,
        NASD_INFO_PAGESIZE);
    }
    if (rc)
      break;

    *out_len += NASD_INFO_PAGESIZE;
    offset += NASD_INFO_PAGESIZE;

  }

  NASD_ODC_ICPART_UNLOCK_READ(icp);

  return(rc);
}


nasd_status_t
nasd_read_part_objs(
  int                       partnum,
  nasd_offset_t             offset,
  nasd_len_t                in_len,
  int                       is_read2,
  nasd_procpipe_t          *byte_pipe,
  nasd_len_t               *out_lenp)
{
  int wf, cur, i, keep_scanning, pn, fin;
  nasd_blkno_t npt1_blk, npt2_blk;
  nasd_identifier_otw_t nid_otw;
  nasd_timespec_t compl_time;
  nasd_nodenum_t nodenum;
  nasd_odc_icpart_t *icp;
  nasd_identifier_t nid;
  nasd_blkcnt_t npt_len;
  nasd_offset_t cur_off;
  nasd_od_part_t *part;
  nasd_odc_ent_t *npte;
  nasd_od_pte_t *ptes;
  nasd_len_t out_len;
  nasd_timespec_t ts;
  nasd_status_t rc;
  nasd_timer_t tm;

  if (in_len % sizeof(nasd_identifier_t))
    return(NASD_BAD_LEN);

  part = &PART(partnum);
  icp = &nasd_odc_state->parts[partnum];

  NASD_ODC_ICPART_LOCK_READ(icp);
  if (NASD_OD_INVALID_PART(part)) {
    NASD_ODC_ICPART_UNLOCK_READ(icp);
    return(NASD_BAD_PARTITION);
  }

  out_len = 0;
  npt_len = nasd_odc_state->disk->npt_ext.last
    - nasd_odc_state->disk->npt_ext.first + 1;
  wf = offset/sizeof(nasd_identifier_t);
  rc = NASD_SUCCESS;
  npt1_blk = nasd_odc_state->disk->npt_ext.first;
  i = 0;

  /*
   * See if we're resuming an old read sequence (in which case,
   * we take an optimized path to avoid rescanning old NPT blocks
   * if they haven't changed), or if we're starting a new one.
   */
  if (offset && (icp->last_objlist_off == offset)
    && NASD_TIMESPEC_EQ(part->last_cr_del, icp->last_objlist_get))
  {
    /*
     * Resume the old scan
     */
    fin = icp->last_objlist_fin;
    cur_off = icp->last_objlist_off;
    if (fin) {
      /* already completed */
      goto done_scan;
    }
    npt1_blk = icp->last_objlist_npt;
    npt2_blk = npt1_blk + npt_len;
    cur = wf;
    i = icp->last_objlist_ind;
    if (i >= NASD_OD_NODES_PER_NPT_BLOCK) {
      i = 0;
      npt1_blk++;
      npt2_blk++;
    }
    if (npt1_blk > nasd_odc_state->disk->npt_ext.last) {
      fin = 1;
      goto done_scan;
    }
  }
  else {
    /*
     * Start a new scan
     */
    fin = 0;
    cur_off = 0;
    npt1_blk = nasd_odc_state->disk->npt_ext.first;
    npt2_blk = nasd_odc_state->disk->npt2_ext.first;
    cur = 0;
    i = 0;
  }

  compl_time = part->last_cr_del;

  /*
   * wf  = num of valid object we want first
   * cur = num of object we are currently looking at
   * i   = offset in current npt block of cur
   */
  keep_scanning = 1;
  do {
    NASD_ASSERT(npt1_blk <= nasd_odc_state->disk->npt_ext.last);
    NASD_ASSERT(npt2_blk <= nasd_odc_state->disk->npt2_ext.last);

    rc = nasd_odc_block_get(NULL, npt2_blk, 0, &npte,
      NASD_ID_NULL, 0, NASD_ODC_T_NPT2, NULL);
    if (rc != NASD_SUCCESS) {
      /*
       * Secondary copy not in-core. Get primary copy. If it's
       * not already here, read it in.
       */
      rc = nasd_odc_block_get(NULL, npt1_blk,
        NASD_ODC_L_FORCE|NASD_ODC_L_BLOCK|NASD_ODC_L_LOAD, &npte,
        NASD_ID_NULL, 0, NASD_ODC_T_NPT1, NULL);
      if (rc != NASD_SUCCESS) {
        /* try to force loading secondary block */
        rc = nasd_odc_block_get(NULL, npt2_blk,
          NASD_ODC_L_FORCE|NASD_ODC_L_BLOCK|NASD_ODC_L_LOAD, &npte,
          NASD_ID_NULL, 0, NASD_ODC_T_NPT2, NULL);
      }
    }
    if (rc != NASD_SUCCESS) {
      /*
       * For some reason, we couldn't load either the primary or
       * secondary NPT block. Houston, we have a problem.
       */
      compl_time.ts_sec = 0;
      compl_time.ts_nsec = 0;
      rc = NASD_FAIL;
      goto done_scan;
    }

    NASD_ODC_LOCK_BLOCK(npte);
    nasd_odc_wait_not_busy_invalid(npte);
    NASD_ODC_UNLOCK_BLOCK(npte);
    ptes = npte->data.pte;

    /*
     * Sweep the NPT block for valid objects in our partition.
     */
    NASD_ODC_RLOCK_BLOCK_DATA(npte);
    /* i initted either above or reinitted right after this loop */
    for(;((i<NASD_OD_NODES_PER_NPT_BLOCK)&&keep_scanning);i++) {
      if (ptes[i].blkno) {
        /* A valid object exists in this slot */
        pn = ptes[i].highbits & 0x01e00000;
        pn >>= 21;
        if (pn == partnum) {
          /* The object is in our partition */
          if (cur_off >= offset) {
            /* compute object id */
            nodenum = npt1_blk - nasd_odc_state->disk->npt_ext.first;
            nodenum *= NASD_OD_NODES_PER_NPT_BLOCK;
            nodenum += i;
            nodenum += nasd_reserved_nodes;
            nid = ptes[i].highbits;
            nid <<= 32;
            nid |= nodenum;
            /* ...marshall it */
            nasd_identifier_t_marshall(&nid, nid_otw);
            /* ...and send it */
            NASD_TM_START(&tm);
            rc = byte_pipe->push(byte_pipe->state, nid_otw,
              sizeof(nasd_identifier_t), NULL, NULL, NULL);
            NASD_TM_STOP_ELAP_TS(&tm,&ts);
            if (is_read2) {
              NASD_ATOMIC_TIMESPEC_ADD(&nasd_drive_cache_stats.read2_pipe_stall_time, &ts);
              NASD_ATOMIC_ADD64(&nasd_drive_cache_stats.read2_bytes,
                sizeof(nasd_identifier_t));
            }
            else {
              NASD_ATOMIC_TIMESPEC_ADD(&nasd_drive_cache_stats.read_pipe_stall_time, &ts);
              NASD_ATOMIC_ADD64(&nasd_drive_cache_stats.read_bytes,
                sizeof(nasd_identifier_t));
            }
            if (rc)
              break;

            out_len += sizeof(nasd_identifier_t);
          }
          cur_off += sizeof(nasd_identifier_t);
          if (out_len >= in_len) {
            keep_scanning = 0;
          }
        }
      }
    }

    if (rc)
      break;

    NASD_ODC_RUNLOCK_BLOCK_DATA(npte);
    nasd_odc_block_release(npte);
    if (out_len >= in_len) {
      keep_scanning = 0;
    }
    if (keep_scanning) {
      npt1_blk++;
      npt2_blk++;
      i = 0; /* for next time around */
      if (npt1_blk > nasd_odc_state->disk->npt_ext.last) {
        keep_scanning = 0;
      }
    }
  } while(keep_scanning);

done_scan:

  *out_lenp += out_len;

  icp->last_objlist_get = compl_time;
  icp->last_objlist_fin = fin;
  icp->last_objlist_off = cur_off;
  icp->last_objlist_npt = npt1_blk;
  icp->last_objlist_ind = i;

  NASD_ODC_ICPART_UNLOCK_READ(icp);

  return(rc);
}


nasd_status_t
nasd_obj_control_read_simple(
  int                       partnum,
  nasd_nodenum_t            ctrl_node,
  nasd_offset_t             offset,
  int                       is_read2,
  nasd_len_t                in_len,
  nasd_procpipe_t          *byte_pipe,
  nasd_len_t               *out_len)
{
  nasd_pagebuf_t *pb, *pb_net;
  nasd_offset_t off;
  nasd_status_t rc;

  *out_len = 0;
  off = offset;
  pb = NULL;
  pb_net = NULL;
  rc = NASD_SUCCESS;

  switch(ctrl_node) {
    case NASD_CTRL_DRIVE_INFO:
      NASD_FREELIST_GET(nasd_free_pagebuf,pb,next,(nasd_pagebuf_t *));
      if (pb == NULL) {
        rc = NASD_NO_MEM;
        goto done;
      }
      NASD_FREELIST_GET(nasd_free_pagebuf,pb_net,next,(nasd_pagebuf_t *));
      if (pb_net == NULL) {
        rc = NASD_NO_MEM;
        goto done;
      }
      rc = nasd_read_drive_info(pb, pb_net, partnum, offset, in_len,
        is_read2, byte_pipe, out_len);
      break;
    case NASD_CTRL_PART_INFO:
      NASD_FREELIST_GET(nasd_free_pagebuf,pb,next,(nasd_pagebuf_t *));
      if (pb == NULL) {
        rc = NASD_NO_MEM;
        goto done;
      }
      NASD_FREELIST_GET(nasd_free_pagebuf,pb_net,next,(nasd_pagebuf_t *));
      if (pb_net == NULL) {
        rc = NASD_NO_MEM;
        goto done;
      }
      rc = nasd_read_part_info(pb, pb_net, partnum, offset, in_len,
        is_read2, byte_pipe, out_len);
      break;
    case NASD_CTRL_TRACE_INFO:
      NASD_FREELIST_GET(nasd_free_pagebuf,pb,next,(nasd_pagebuf_t *));
      if (pb == NULL) {
        rc = NASD_NO_MEM;
        goto done;
      }
      NASD_FREELIST_GET(nasd_free_pagebuf,pb_net,next,(nasd_pagebuf_t *));
      if (pb_net == NULL) {
        rc = NASD_NO_MEM;
        goto done;
      }
      rc = nasd_dt_read_trace_info(pb, pb_net, partnum, offset, in_len,
        is_read2, byte_pipe, out_len);
      break;
    case NASD_CTRL_TRACE_DATA:
      NASD_FREELIST_GET(nasd_free_pagebuf,pb,next,(nasd_pagebuf_t *));
      if (pb == NULL) {
        rc = NASD_NO_MEM;
        goto done;
      }
      NASD_FREELIST_GET(nasd_free_pagebuf,pb_net,next,(nasd_pagebuf_t *));
      if (pb_net == NULL) {
        rc = NASD_NO_MEM;
        goto done;
      }
      rc = nasd_dt_read_trace_data(pb, pb_net, partnum, offset, in_len,
        is_read2, byte_pipe, out_len);
      break;
    case NASD_CTRL_PART_OBJS:
      rc = nasd_read_part_objs(partnum, offset, in_len,
        is_read2, byte_pipe, out_len);
      break;
    default:
      rc = NASD_BAD_IDENTIFIER;
      goto done;
  }

done:
  if (pb) {
    NASD_FREELIST_FREE(nasd_free_pagebuf,pb,next);
  }
  if (pb_net) {
    NASD_FREELIST_FREE(nasd_free_pagebuf,pb_net,next);
  }
  return(rc);
}



nasd_status_t
nasd_obj_control_write_simple(
  int                partnum,
  nasd_nodenum_t     ctrl_node,
  nasd_offset_t      offset,
  nasd_len_t         len,
  nasd_procpipe_t   *byte_pipe,
  nasd_len_t        *out_len)
{
  nasd_offset_t off;
  nasd_status_t rc;

  *out_len = 0;
  off = offset;

  rc = NASD_SUCCESS;

  switch(ctrl_node) {
    case NASD_CTRL_DRIVE_INFO:
      rc = nasd_write_drive_info(partnum, offset, len, byte_pipe, out_len);
      break;
    case NASD_CTRL_TRACE_INFO:
      rc = nasd_dt_write_trace_info(partnum, offset, len, byte_pipe, out_len);
      break;
    case NASD_CTRL_TRACE_DATA:
      rc = nasd_dt_write_trace_data(partnum, offset, len, byte_pipe, out_len);
      break;
    case NASD_CTRL_PART_INFO:
      rc = nasd_write_part_info(partnum, offset, len, byte_pipe, out_len);
      break;
    case NASD_CTRL_PART_OBJS:
      rc = NASD_NOT_ON_CONTROL;
      break;
    default:
      rc = NASD_BAD_IDENTIFIER;
      break;
  }

  return(rc);
}


nasd_status_t
nasd_obj_control_getattr(
  int                partnum,
  nasd_nodenum_t     ctrl_node,
  nasd_attribute_t  *attrp)
{
  nasd_odc_icpart_t *icp;
  nasd_od_part_t *part;
  nasd_status_t rc;

  bzero((char *)attrp, sizeof(nasd_attribute_t));
  rc = NASD_FAIL;
  switch(ctrl_node) {
    case NASD_CTRL_DRIVE_INFO:
      attrp->object_len = 1 * NASD_INFO_PAGESIZE;
      attrp->block_size = NASD_INFO_PAGESIZE;
      NASD_ODC_LOCK_DISK();
      attrp->attr_modify_time = nasd_odc_state->disk->mod_time;
      attrp->object_modify_time = nasd_odc_state->disk->mod_time;
      attrp->object_create_time = nasd_odc_state->disk->format_time;
      attrp->fs_attr_modify_time = nasd_odc_state->disk->mod_time;
      attrp->fs_object_modify_time = nasd_odc_state->disk->mod_time;
      NASD_ODC_UNLOCK_DISK();
      rc = NASD_SUCCESS;
      break;
    case NASD_CTRL_PART_INFO:
      part = &PART(partnum);
      icp = &nasd_odc_state->parts[partnum];
      if (NASD_OD_INVALID_PART(part)) {
        rc = NASD_BAD_PARTITION;
        break;
      }
      attrp->object_len = 1 * NASD_INFO_PAGESIZE;
      attrp->block_size = NASD_INFO_PAGESIZE;
      NASD_ODC_ICPART_LOCK_READ(icp);
      attrp->attr_modify_time = part->mod_time;
      attrp->object_modify_time = part->mod_time;
      attrp->object_create_time = part->create_time;
      attrp->fs_attr_modify_time = part->mod_time;
      attrp->fs_object_modify_time = part->mod_time;
      NASD_ODC_ICPART_UNLOCK_READ(icp);
      rc = NASD_SUCCESS;
      break;
    case NASD_CTRL_PART_OBJS:
      part = &PART(partnum);
      icp = &nasd_odc_state->parts[partnum];
      if (NASD_OD_INVALID_PART(part)) {
        rc = NASD_BAD_PARTITION;
        break;
      }
      NASD_ODC_ICPART_LOCK_READ(icp);
      attrp->object_len = part->num_obj * sizeof(nasd_identifier_t);
      attrp->block_size = NASD_INFO_PAGESIZE;
      attrp->attr_modify_time = part->mod_time;
      attrp->object_modify_time = part->last_cr_del;
      attrp->object_create_time = part->create_time;
      attrp->fs_attr_modify_time = part->mod_time;
      attrp->fs_object_modify_time = part->last_cr_del;
      NASD_ODC_ICPART_UNLOCK_READ(icp);
      rc = NASD_SUCCESS;
      break;
    default:
      rc = NASD_BAD_IDENTIFIER;
  }

  return(rc);
}

/* Local Variables:  */
/* indent-tabs-mode: nil */
/* tab-width: 2 */
/* End: */
