/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*
 * Simulated network device (tap) driver: simulates a pseudo GLDv3 network
 * device. 
 */

#include <sys/policy.h>
#include <sys/conf.h>
#include <sys/modctl.h>
#include <sys/priv_names.h>
#include <sys/dlpi.h>
#include <net/tap.h>
#include <sys/ethernet.h>
#include <sys/mac.h>
#include <sys/dls.h>
#include <sys/mac_ether.h>
#include <sys/mac_provider.h>
#include <sys/mac_client_priv.h>
#include <sys/vlan.h>
#include <sys/random.h>
#include <sys/sysmacros.h>
#include <sys/list.h>
#include <sys/strsubr.h>
#include <sys/strsun.h>
#include <sys/atomic.h>
#include <sys/mac_impl.h>
#include <sys/thread.h>
#include <sys/synch.h>
#include <sys/sunddi.h>

#include "tap_impl.h"

#define	TAPINFO		"TAP Network Driver"

static dev_info_t *tap_dip;
static ddi_taskq_t *tap_rxq;

static int tap_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
static int tap_attach(dev_info_t *, ddi_attach_cmd_t);
static int tap_detach(dev_info_t *, ddi_detach_cmd_t);
static int tap_ioc_create(void *, intptr_t, int, cred_t *, int *);
static int tap_ioc_delete(void *, intptr_t, int, cred_t *, int *);
static int tap_ioc_info(void *, intptr_t, int, cred_t *, int *);
static int tap_ioc_modify(void *, intptr_t, int, cred_t *, int *);
static uint8_t *mcastaddr_lookup(tap_dev_t *, const uint8_t *);

static dld_ioc_info_t tap_ioc_list[] = {
	{TAP_IOC_CREATE, DLDCOPYINOUT, sizeof (tap_ioc_create_t),
	    tap_ioc_create, secpolicy_dl_config},
	{TAP_IOC_DELETE, DLDCOPYIN, sizeof (tap_ioc_delete_t),
	    tap_ioc_delete, secpolicy_dl_config},
	{TAP_IOC_INFO, DLDCOPYINOUT, sizeof (tap_ioc_info_t),
	    tap_ioc_info, NULL},
	{TAP_IOC_MODIFY, DLDCOPYIN, sizeof (tap_ioc_modify_t),
	    tap_ioc_modify, secpolicy_dl_config}
};

DDI_DEFINE_STREAM_OPS(tap_dev_ops, nulldev, nulldev, tap_attach,
    tap_detach, nodev, tap_getinfo, D_MP, NULL,
    ddi_quiesce_not_supported);

static struct modldrv tap_modldrv = {
	&mod_driverops,		/* Type of module.  This one is a driver */
	TAPINFO,		/* short description */
	&tap_dev_ops		/* driver specific ops */
};

static struct modlinkage modlinkage = {
	MODREV_1, &tap_modldrv, NULL
};

/* MAC callback function declarations */
static int tap_m_start(void *);
static void tap_m_stop(void *);
static int tap_m_promisc(void *, boolean_t);
static int tap_m_multicst(void *, boolean_t, const uint8_t *);
static int tap_m_unicst(void *, const uint8_t *);
static int tap_m_stat(void *, uint_t, uint64_t *);
static void tap_m_ioctl(void *, queue_t *, mblk_t *);
static mblk_t *tap_m_tx(void *, mblk_t *);
static int tap_m_setprop(void *, const char *, mac_prop_id_t,
    uint_t, const void *);

static mac_callbacks_t tap_m_callbacks = {
	(MC_IOCTL | MC_SETPROP),
	tap_m_stat,
	tap_m_start,
	tap_m_stop,
	tap_m_promisc,
	tap_m_multicst,
	tap_m_unicst,
	tap_m_tx,
	NULL,
	tap_m_ioctl,
	NULL,
	NULL,
	NULL,
	tap_m_setprop,
	NULL,
	NULL
};

/*
 * tap_dev_lock protects the tap device list.
 * sd_instlock in each tap_dev_t protects access to
 * a single tap_dev_t.
 */
static krwlock_t	tap_dev_lock;
static list_t		tap_dev_list;
static int		tap_count; /* Num of tap instances */

int
_init(void)
{
	int	status;

	mac_init_ops(&tap_dev_ops, "tap");
	status = mod_install(&modlinkage);
	if (status != DDI_SUCCESS)
		mac_fini_ops(&tap_dev_ops);

	return (status);
}

int
_fini(void)
{
	int	status;

	status = mod_remove(&modlinkage);
	if (status == DDI_SUCCESS)
		mac_fini_ops(&tap_dev_ops);

	return (status);
}

int
_info(struct modinfo *modinfop)
{
	return (mod_info(&modlinkage, modinfop));
}

static boolean_t
tap_init(void)
{
	if ((tap_rxq = ddi_taskq_create(tap_dip, "tap", 1,
	    TASKQ_DEFAULTPRI, 0)) == NULL)
		return (B_FALSE);
	rw_init(&tap_dev_lock, NULL, RW_DEFAULT, NULL);
	list_create(&tap_dev_list, sizeof (tap_dev_t),
	    offsetof(tap_dev_t, sd_listnode));
	return (B_TRUE);
}

static void
tap_fini(void)
{
	ASSERT(tap_count == 0);
	rw_destroy(&tap_dev_lock);
	list_destroy(&tap_dev_list);
	ddi_taskq_destroy(tap_rxq);
}

/*ARGSUSED*/
static int
tap_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
    void **result)
{
	switch (infocmd) {
	case DDI_INFO_DEVT2DEVINFO:
		*result = tap_dip;
		return (DDI_SUCCESS);
	case DDI_INFO_DEVT2INSTANCE:
		*result = NULL;
		return (DDI_SUCCESS);
	}
	return (DDI_FAILURE);
}

static int
tap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
	switch (cmd) {
	case DDI_ATTACH:
		if (ddi_get_instance(dip) != 0) {
			/* we only allow instance 0 to attach */
			return (DDI_FAILURE);
		}

		if (dld_ioc_register(TAP_IOC, tap_ioc_list,
		    DLDIOCCNT(tap_ioc_list)) != 0)
			return (DDI_FAILURE);

		tap_dip = dip;
		if (!tap_init())
			return (DDI_FAILURE);
		return (DDI_SUCCESS);

	case DDI_RESUME:
		return (DDI_SUCCESS);

	default:
		return (DDI_FAILURE);
	}
}

/*ARGSUSED*/
static int
tap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
	switch (cmd) {
	case DDI_DETACH:
		/*
		 * Allow the tap instance to be detached only if there
		 * are no taps configured.
		 */
		if (tap_count > 0)
			return (DDI_FAILURE);

		dld_ioc_unregister(TAP_IOC);
		tap_fini();
		tap_dip = NULL;
		return (DDI_SUCCESS);

	case DDI_SUSPEND:
		return (DDI_SUCCESS);

	default:
		return (DDI_FAILURE);
	}
}

/* Caller must hold tap_dev_lock */
static tap_dev_t *
tap_dev_lookup(datalink_id_t link_id)
{
	tap_dev_t *sdev;

	ASSERT(RW_LOCK_HELD(&tap_dev_lock));
	for (sdev = list_head(&tap_dev_list); sdev != NULL;
	    sdev = list_next(&tap_dev_list, sdev)) {
		if (!(sdev->sd_flags & SDF_SHUTDOWN) &&
		    (sdev->sd_link_id == link_id)) {
			atomic_inc_32(&sdev->sd_refcount);
			return (sdev);
		}
	}

	return (NULL);
}

static void
tap_dev_unref(tap_dev_t *sdev)
{

	ASSERT(sdev->sd_refcount > 0);
	if (atomic_dec_32_nv(&sdev->sd_refcount) != 0)
		return;

	if (sdev->sd_mh != NULL)
		(void) mac_unregister(sdev->sd_mh);

        mutex_destroy(&sdev->sd_instlock);
	cv_destroy(&sdev->sd_threadwait);
	kmem_free(sdev->sd_mcastaddrs, ETHERADDRL * sdev->sd_mcastaddr_count);
	kmem_free(sdev, sizeof (*sdev));
	tap_count--;
}

static int
tap_init_ether(tap_dev_t *sdev, mac_register_t *mac)
{
	int err;

	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
	mac->m_max_sdu = TAP_MAX_MTU;
	mac->m_margin = VLAN_TAGSZ;
	err = mac_register(mac, &sdev->sd_mh);
	return (err);
}

static int
tap_init_mac(tap_dev_t *sdev)
{
	mac_register_t *mac;
	int err;

	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
		return (ENOMEM);

	mac->m_driver = sdev;
	mac->m_dip = tap_dip;
	mac->m_instance = (uint_t)-1;
	mac->m_src_addr = sdev->sd_mac_addr;
	mac->m_callbacks = &tap_m_callbacks;
	mac->m_min_sdu = 0;

	if (sdev->sd_type == DL_ETHER)
		err = tap_init_ether(sdev, mac);
	else
		err = EINVAL;

	mac_free(mac);
	return (err);
}

/* ARGSUSED */
static int
tap_ioc_create(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp)
{
	tap_ioc_create_t *create_arg = karg;
	tap_dev_t *sdev;
	tap_dev_t *sdev_tmp;
	int err = 0;

	sdev = kmem_zalloc(sizeof (*sdev), KM_NOSLEEP);
	if (sdev == NULL)
		return (ENOMEM);

	rw_enter(&tap_dev_lock, RW_WRITER);
	if ((sdev_tmp = tap_dev_lookup(create_arg->sic_link_id)) != NULL) {
		tap_dev_unref(sdev_tmp);
		rw_exit(&tap_dev_lock);
		kmem_free(sdev, sizeof (*sdev));
		return (EEXIST);
	}

	sdev->sd_type = create_arg->sic_type;
	sdev->sd_link_id = create_arg->sic_link_id;
	sdev->sd_zoneid = crgetzoneid(cred);
	sdev->sd_refcount++;
	mutex_init(&sdev->sd_instlock, NULL, MUTEX_DRIVER, NULL);
	cv_init(&sdev->sd_threadwait, NULL, CV_DRIVER, NULL);
	tap_count++;

	/* Simnets created from configuration on boot pass saved MAC address */
	if (create_arg->sic_mac_len == 0) {
		/* Generate random MAC address */
		(void) random_get_pseudo_bytes(sdev->sd_mac_addr, ETHERADDRL);
		/* Ensure MAC address is not multicast and is local */
		sdev->sd_mac_addr[0] = (sdev->sd_mac_addr[0] & ~1) | 2;
		sdev->sd_mac_len = ETHERADDRL;
	} else {
		(void) memcpy(sdev->sd_mac_addr, create_arg->sic_mac_addr,
		    create_arg->sic_mac_len);
		sdev->sd_mac_len = create_arg->sic_mac_len;
	}

	if ((err = tap_init_mac(sdev)) != 0) {
		tap_dev_unref(sdev);
		goto exit;
	}

	if ((err = dls_devnet_create(sdev->sd_mh, sdev->sd_link_id,
	    crgetzoneid(cred))) != 0) {
		tap_dev_unref(sdev);
		goto exit;
	}

	mac_link_update(sdev->sd_mh, LINK_STATE_UP);
	mac_tx_update(sdev->sd_mh);
	list_insert_tail(&tap_dev_list, sdev);

	/* Always return MAC address back to caller */
	(void) memcpy(create_arg->sic_mac_addr, sdev->sd_mac_addr,
	    sdev->sd_mac_len);
	create_arg->sic_mac_len = sdev->sd_mac_len;
exit:
	rw_exit(&tap_dev_lock);
	return (err);
}

/* Caller must hold writer tap_dev_lock */
static datalink_id_t
tap_remove_peer(tap_dev_t *sdev)
{
	tap_dev_t *sdev_peer;
	datalink_id_t peer_link_id = DATALINK_INVALID_LINKID;

	ASSERT(RW_WRITE_HELD(&tap_dev_lock));
	if ((sdev_peer = sdev->sd_peer_dev) != NULL) {
		ASSERT(sdev == sdev_peer->sd_peer_dev);
		sdev_peer->sd_peer_dev = NULL;
		sdev->sd_peer_dev = NULL;
		peer_link_id = sdev_peer->sd_link_id;
		/* Release previous references held on both taps */
		tap_dev_unref(sdev_peer);
		tap_dev_unref(sdev);
	}

	return (peer_link_id);
}

/* ARGSUSED */
static int
tap_ioc_modify(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp)
{
	tap_ioc_modify_t *modify_arg = karg;
	tap_dev_t *sdev;
	tap_dev_t *sdev_peer = NULL;

	rw_enter(&tap_dev_lock, RW_WRITER);
	if ((sdev = tap_dev_lookup(modify_arg->sim_link_id)) == NULL) {
		rw_exit(&tap_dev_lock);
		return (ENOENT);
	}

	if (sdev->sd_zoneid != crgetzoneid(cred)) {
		rw_exit(&tap_dev_lock);
		tap_dev_unref(sdev);
		return (ENOENT);
	}

	if (sdev->sd_link_id == modify_arg->sim_peer_link_id) {
		/* Cannot peer with self */
		rw_exit(&tap_dev_lock);
		tap_dev_unref(sdev);
		return (EINVAL);
	}

	if (sdev->sd_peer_dev != NULL && sdev->sd_peer_dev->sd_link_id ==
	    modify_arg->sim_peer_link_id) {
		/* Nothing to modify */
		rw_exit(&tap_dev_lock);
		tap_dev_unref(sdev);
		return (0);
	}

	if (modify_arg->sim_peer_link_id != DATALINK_INVALID_LINKID) {
		sdev_peer = tap_dev_lookup(modify_arg->sim_peer_link_id);
		if (sdev_peer == NULL) {
			/* Peer tap device not available */
			rw_exit(&tap_dev_lock);
			tap_dev_unref(sdev);
			return (ENOENT);
		}
		if (sdev_peer->sd_zoneid != sdev->sd_zoneid) {
			/* The two peers must be in the same zone (for now). */
			rw_exit(&tap_dev_lock);
			tap_dev_unref(sdev);
			tap_dev_unref(sdev_peer);
			return (EACCES);
		}
	}

	/* First remove any previous peer */
	(void) tap_remove_peer(sdev);

	if (sdev_peer != NULL) {
		/* Remove any previous peer of sdev_peer */
		(void) tap_remove_peer(sdev_peer);
		/* Update both devices with the new peer */
		sdev_peer->sd_peer_dev = sdev;
		sdev->sd_peer_dev = sdev_peer;
		/* Hold references on both devices */
	} else {
		/* Release sdev lookup reference */
		tap_dev_unref(sdev);
	}

	rw_exit(&tap_dev_lock);
	return (0);
}

/* ARGSUSED */
static int
tap_ioc_delete(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp)
{
	int err;
	tap_dev_t *sdev;
	tap_dev_t *sdev_peer;
	tap_ioc_delete_t *delete_arg = karg;
	datalink_id_t tmpid;
	datalink_id_t peerid;

	rw_enter(&tap_dev_lock, RW_WRITER);
	if ((sdev = tap_dev_lookup(delete_arg->sid_link_id)) == NULL) {
		rw_exit(&tap_dev_lock);
		return (ENOENT);
	}

	if (sdev->sd_zoneid != crgetzoneid(cred)) {
		rw_exit(&tap_dev_lock);
		tap_dev_unref(sdev);
		return (ENOENT);
	}

	if ((err = dls_devnet_destroy(sdev->sd_mh, &tmpid, B_TRUE)) != 0) {
		rw_exit(&tap_dev_lock);
		tap_dev_unref(sdev);
		return (err);
	}

	ASSERT(sdev->sd_link_id == tmpid);
	/* Remove any attached peer link */
	peerid = tap_remove_peer(sdev);

	/* Prevent new threads from using the instance */
	mutex_enter(&sdev->sd_instlock);
	sdev->sd_flags |= SDF_SHUTDOWN;
	/* Wait until all active threads using the instance exit */
	while (sdev->sd_threadcount > 0) {
		if (cv_wait_sig(&sdev->sd_threadwait,
		    &sdev->sd_instlock) == 0)  {
			/* Signaled */
			mutex_exit(&sdev->sd_instlock);
			err = EINTR;
			goto fail;
		}
	}
	mutex_exit(&sdev->sd_instlock);

	/* Try disabling the MAC */
	if ((err = mac_disable(sdev->sd_mh)) != 0)
		goto fail;

	list_remove(&tap_dev_list, sdev);
	rw_exit(&tap_dev_lock);
	tap_dev_unref(sdev); /* Release lookup ref */
	/* Releasing the last ref performs sdev/mem free */
	tap_dev_unref(sdev);
	return (err);
fail:
	/* Re-create tap instance and add any previous peer */
	(void) dls_devnet_create(sdev->sd_mh, sdev->sd_link_id,
	    crgetzoneid(cred));
	sdev->sd_flags &= ~SDF_SHUTDOWN;

	ASSERT(sdev->sd_peer_dev == NULL);
	if (peerid != DATALINK_INVALID_LINKID &&
	    ((sdev_peer = tap_dev_lookup(peerid)) != NULL)) {
		/* Attach peer device back */
		ASSERT(sdev_peer->sd_peer_dev == NULL);
		sdev_peer->sd_peer_dev = sdev;
		sdev->sd_peer_dev = sdev_peer;
		/* Hold reference on both devices */
	} else {
		/*
		 * No previous peer or previous peer no longer
		 * available so release lookup reference.
		 */
		tap_dev_unref(sdev);
	}

	rw_exit(&tap_dev_lock);
	return (err);
}

/* ARGSUSED */
static int
tap_ioc_info(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp)
{
	tap_ioc_info_t *info_arg = karg;
	tap_dev_t *sdev;

	/* Make sure that the tap link is visible from the caller's zone. */
	if (!dls_devnet_islinkvisible(info_arg->sii_link_id, crgetzoneid(cred)))
		return (ENOENT);

	rw_enter(&tap_dev_lock, RW_READER);
	if ((sdev = tap_dev_lookup(info_arg->sii_link_id)) == NULL) {
		rw_exit(&tap_dev_lock);
		return (ENOENT);
	}

	(void) memcpy(info_arg->sii_mac_addr, sdev->sd_mac_addr,
	    sdev->sd_mac_len);
	info_arg->sii_mac_len = sdev->sd_mac_len;
	info_arg->sii_type = sdev->sd_type;
	if (sdev->sd_peer_dev != NULL)
		info_arg->sii_peer_link_id = sdev->sd_peer_dev->sd_link_id;
	rw_exit(&tap_dev_lock);
	tap_dev_unref(sdev);
	return (0);
}

static boolean_t
tap_thread_ref(tap_dev_t *sdev)
{
	mutex_enter(&sdev->sd_instlock);
	if (sdev->sd_flags & SDF_SHUTDOWN ||
	    !(sdev->sd_flags & SDF_STARTED)) {
		mutex_exit(&sdev->sd_instlock);
		return (B_FALSE);
	}
	sdev->sd_threadcount++;
	mutex_exit(&sdev->sd_instlock);
	return (B_TRUE);
}

static void
tap_thread_unref(tap_dev_t *sdev)
{
	mutex_enter(&sdev->sd_instlock);
	if (--sdev->sd_threadcount == 0)
		cv_broadcast(&sdev->sd_threadwait);
	mutex_exit(&sdev->sd_instlock);
}

static void
tap_rx(void *arg)
{
	mblk_t *mp = arg;
	mac_header_info_t hdr_info;
	tap_dev_t *sdev;

	sdev = (tap_dev_t *)mp->b_next;
	mp->b_next = NULL;

	/* Check for valid packet header */
	if (mac_header_info(sdev->sd_mh, mp, &hdr_info) != 0) {
		freemsg(mp);
		sdev->sd_stats.recv_errors++;
		goto rx_done;
	}

	/*
	 * When we are NOT in promiscuous mode we only receive
	 * unicast packets addressed to us and multicast packets that
	 * MAC clients have requested.
	 */
	if (!sdev->sd_promisc &&
	    hdr_info.mhi_dsttype != MAC_ADDRTYPE_BROADCAST) {
		if (hdr_info.mhi_dsttype == MAC_ADDRTYPE_UNICAST &&
		    bcmp(hdr_info.mhi_daddr, sdev->sd_mac_addr,
		    ETHERADDRL) != 0) {
			freemsg(mp);
			goto rx_done;
		} else if (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) {
			mutex_enter(&sdev->sd_instlock);
			if (mcastaddr_lookup(sdev, hdr_info.mhi_daddr) ==
			    NULL) {
				mutex_exit(&sdev->sd_instlock);
				freemsg(mp);
				goto rx_done;
			}
			mutex_exit(&sdev->sd_instlock);
		}
	}

	sdev->sd_stats.recv_count++;
	sdev->sd_stats.rbytes += msgdsize(mp);
	mac_rx(sdev->sd_mh, NULL, mp);
rx_done:
	tap_thread_unref(sdev);
}

static mblk_t *
tap_m_tx(void *arg, mblk_t *mp_chain)
{
	tap_dev_t *sdev = arg;
	tap_dev_t *sdev_rx;
	mblk_t *mpnext = mp_chain;
	mblk_t *mp;

	rw_enter(&tap_dev_lock, RW_READER);
	if ((sdev_rx = sdev->sd_peer_dev) == NULL) {
		/* Discard packets when no peer exists */
		rw_exit(&tap_dev_lock);
		freemsgchain(mp_chain);
		return (NULL);
	}

	/*
	 * Discard packets when either device is shutting down or not ready.
	 * Though MAC layer ensures a reference is held on the MAC while we
	 * process the packet chain, there is no guarantee the peer MAC will
	 * remain enabled. So we increment per-instance threadcount to ensure
	 * either MAC instance is not disabled while we handle the chain of
	 * packets. It is okay if the peer device is disconnected while we are
	 * here since we lookup the peer device while holding tap_dev_lock
	 * (reader lock) and increment the threadcount of the peer, the peer
	 * MAC cannot be disabled in tap_ioc_delete.
	 */
	if (!tap_thread_ref(sdev_rx)) {
		rw_exit(&tap_dev_lock);
		freemsgchain(mp_chain);
		return (NULL);
	}
	rw_exit(&tap_dev_lock);

	if (!tap_thread_ref(sdev)) {
		tap_thread_unref(sdev_rx);
		freemsgchain(mp_chain);
		return (NULL);
	}

	while ((mp = mpnext) != NULL) {
		int len;
		int size;
		mblk_t *mp_new;
		mblk_t *mp_tmp;

		mpnext = mp->b_next;
		mp->b_next = NULL;
		len = msgdsize(mp);

		/* Pad packet to minimum Ethernet frame size */
		if (len < ETHERMIN) {
			size = ETHERMIN - len;
			mp_new = allocb(size, BPRI_HI);
			if (mp_new == NULL) {
				sdev->sd_stats.xmit_errors++;
				freemsg(mp);
				continue;
			}
			bzero(mp_new->b_wptr, size);
			mp_new->b_wptr += size;

			mp_tmp = mp;
			while (mp_tmp->b_cont != NULL)
				mp_tmp = mp_tmp->b_cont;
			mp_tmp->b_cont = mp_new;
			len += size;
		}

		/* Pullup packet into a single mblk */
		if (!pullupmsg(mp, -1)) {
			sdev->sd_stats.xmit_errors++;
			freemsg(mp);
			continue;
		}

		/* Fix mblk checksum as the pkt dest is local */
		if ((mp = mac_fix_cksum(mp)) == NULL) {
			sdev->sd_stats.xmit_errors++;
			continue;
		}

		/* Hold reference for taskq receive processing per-pkt */
		if (!tap_thread_ref(sdev_rx)) {
			freemsg(mp);
			freemsgchain(mpnext);
			break;
		}

		/* Use taskq for pkt receive to avoid kernel stack explosion */
		mp->b_next = (mblk_t *)sdev_rx;
		if (ddi_taskq_dispatch(tap_rxq, tap_rx, mp,
		    DDI_NOSLEEP) == DDI_SUCCESS) {
			sdev->sd_stats.xmit_count++;
			sdev->sd_stats.obytes += len;
		} else {
			tap_thread_unref(sdev_rx);
			mp->b_next = NULL;
			freemsg(mp);
			sdev_rx->sd_stats.recv_errors++;
		}
	}

	tap_thread_unref(sdev);
	tap_thread_unref(sdev_rx);
	return (NULL);
}

static int
tap_m_stat(void *arg, uint_t stat, uint64_t *val)
{
	int rval = 0;
	tap_dev_t *sdev = arg;

	ASSERT(sdev->sd_mh != NULL);

	switch (stat) {
	case MAC_STAT_IFSPEED:
		*val = 100 * 1000000ull; /* 100 Mbps */
		break;
	case MAC_STAT_LINK_STATE:
		*val = LINK_DUPLEX_FULL;
		break;
	case MAC_STAT_LINK_UP:
		if (sdev->sd_flags & SDF_STARTED)
			*val = LINK_STATE_UP;
		else
			*val = LINK_STATE_DOWN;
		break;
	case MAC_STAT_PROMISC:
	case MAC_STAT_MULTIRCV:
	case MAC_STAT_MULTIXMT:
	case MAC_STAT_BRDCSTRCV:
	case MAC_STAT_BRDCSTXMT:
		rval = ENOTSUP;
		break;
	case MAC_STAT_OPACKETS:
		*val = sdev->sd_stats.xmit_count;
		break;
	case MAC_STAT_OBYTES:
		*val = sdev->sd_stats.obytes;
		break;
	case MAC_STAT_IERRORS:
		*val = sdev->sd_stats.recv_errors;
		break;
	case MAC_STAT_OERRORS:
		*val = sdev->sd_stats.xmit_errors;
		break;
	case MAC_STAT_RBYTES:
		*val = sdev->sd_stats.rbytes;
		break;
	case MAC_STAT_IPACKETS:
		*val = sdev->sd_stats.recv_count;
		break;
	default:
		rval = ENOTSUP;
		break;
	}

	return (rval);
}

static int
tap_m_start(void *arg)
{
	tap_dev_t *sdev = arg;

	sdev->sd_flags |= SDF_STARTED;
	return (0);
}

static void
tap_m_stop(void *arg)
{
	tap_dev_t *sdev = arg;

	sdev->sd_flags &= ~SDF_STARTED;
}

static int
tap_m_promisc(void *arg, boolean_t on)
{
	tap_dev_t *sdev = arg;

	sdev->sd_promisc = on;
	return (0);
}

/*
 * Returns matching multicast address enabled on the tap instance.
 * Assumes tap instance mutex lock is held.
 */
static uint8_t *
mcastaddr_lookup(tap_dev_t *sdev, const uint8_t *addrp)
{
	int idx;
	uint8_t *maddrptr;

	ASSERT(MUTEX_HELD(&sdev->sd_instlock));
	maddrptr = sdev->sd_mcastaddrs;
	for (idx = 0; idx < sdev->sd_mcastaddr_count; idx++) {
		if (bcmp(maddrptr, addrp, ETHERADDRL) == 0)
			return (maddrptr);
		maddrptr += ETHERADDRL;
	}

	return (NULL);
}

/* Add or remove Multicast addresses on tap instance */
static int
tap_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
{
	tap_dev_t *sdev = arg;
	uint8_t *maddrptr;
	uint8_t *newbuf;
	size_t prevsize;
	size_t newsize;
	ptrdiff_t len;
	ptrdiff_t len2;

alloc_retry:
	prevsize = sdev->sd_mcastaddr_count * ETHERADDRL;
	newsize = prevsize + (add ? ETHERADDRL:-ETHERADDRL);
	newbuf = kmem_alloc(newsize, KM_SLEEP);

	mutex_enter(&sdev->sd_instlock);
	if (prevsize != (sdev->sd_mcastaddr_count * ETHERADDRL)) {
		mutex_exit(&sdev->sd_instlock);
		kmem_free(newbuf, newsize);
		goto alloc_retry;
	}

	maddrptr = mcastaddr_lookup(sdev, addrp);
	if (!add && maddrptr != NULL) {
		/* Removing a Multicast address */
		if (newbuf != NULL) {
			/* LINTED: E_PTRDIFF_OVERFLOW */
			len = maddrptr - sdev->sd_mcastaddrs;
			(void) memcpy(newbuf, sdev->sd_mcastaddrs, len);
			len2 = prevsize - len - ETHERADDRL;
			(void) memcpy(newbuf + len,
			    maddrptr + ETHERADDRL, len2);
		}
		sdev->sd_mcastaddr_count--;
	} else if (add && maddrptr == NULL) {
		/* Adding a new Multicast address */
		(void) memcpy(newbuf, sdev->sd_mcastaddrs, prevsize);
		(void) memcpy(newbuf + prevsize, addrp, ETHERADDRL);
		sdev->sd_mcastaddr_count++;
	} else {
		/* Error: removing a non-existing Multicast address */
		mutex_exit(&sdev->sd_instlock);
		kmem_free(newbuf, newsize);
		cmn_err(CE_WARN, "tap: MAC call to remove a "
		    "Multicast address failed");
		return (EINVAL);
	}

	kmem_free(sdev->sd_mcastaddrs, prevsize);
	sdev->sd_mcastaddrs = newbuf;
	mutex_exit(&sdev->sd_instlock);
	return (0);
}

static int
tap_m_unicst(void *arg, const uint8_t *macaddr)
{
	tap_dev_t *sdev = arg;

	(void) memcpy(sdev->sd_mac_addr, macaddr, ETHERADDRL);
	return (0);
}

static int
tap_set_priv_prop(tap_dev_t *sdev, const char *pr_name,
    uint_t pr_valsize, const void *pr_val)
{
	return (EINVAL);
}

static int
tap_m_setprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num,
    uint_t wldp_length, const void *wldp_buf)
{
	tap_dev_t *sdev = arg;
	int err = 0;
	uint32_t mtu;

	switch (wldp_pr_num) {
	case MAC_PROP_MTU:
		(void) memcpy(&mtu, wldp_buf, sizeof (mtu));
		if (mtu > ETHERMIN && mtu < TAP_MAX_MTU)
			return (mac_maxsdu_update(sdev->sd_mh, mtu));
		else
			return (EINVAL);
	default:
		break;
	}

	if (sdev->sd_type == DL_ETHER)
		return (ENOTSUP);
}

static int
tap_get_priv_prop(tap_dev_t *sdev, const char *pr_name,
    uint_t pr_valsize, void *pr_val)
{
 int err;
	err = ENOTSUP;
	return (err);
}

static int
tap_m_getprop(void *arg, const char *pr_name, mac_prop_id_t wldp_pr_num,
    uint_t wldp_length, void *wldp_buf)
{
	tap_dev_t *sdev = arg;
	int err = 0;
	int i;

	if (sdev->sd_type == DL_ETHER)
		return (ENOTSUP);
}
