#!/usr/bin/python

import argparse
import errno
import logging
import os
import os.path
import platform
import re
import subprocess
import stat
import sys
import tempfile

init_systems = [
    'upstart',
    'sysvinit',
    'systemd',
    'auto',
    ]

log_name = __name__
if log_name == '__main__':
    log_name = os.path.basename(sys.argv[0])
log = logging.getLogger(log_name)


class ActivateError(Exception):
    """
    OSD activation error
    """

    def __str__(self):
        doc = self.__doc__.strip()
        return ': '.join([doc] + [str(a) for a in self.args])


class BadMagicError(ActivateError):
    """
    Does not look like a Ceph OSD, or incompatible version
    """


class TruncatedLineError(ActivateError):
    """
    Line is truncated
    """


class TooManyLinesError(ActivateError):
    """
    Too many lines
    """


class FilesystemTypeError(ActivateError):
    """
    Cannot discover filesystem type
    """


class MountError(ActivateError):
    """
    Mounting filesystem failed
    """


class UnmountError(ActivateError):
    """
    Unmounting filesystem failed
    """


def maybe_mkdir(*a, **kw):
    # remove any symlink, if it is there..
    if os.path.exists(*a) and stat.S_ISLNK(os.lstat(*a).st_mode):
        log.debug('Removing old symlink at %s', *a)
        os.unlink(*a)
    try:
        os.mkdir(*a, **kw)
    except OSError, e:
        if e.errno == errno.EEXIST:
            pass
        else:
            raise


def must_be_one_line(line):
    if line[-1:] != '\n':
        raise TruncatedLineError(line)
    line = line[:-1]
    if '\n' in line:
        raise TooManyLinesError(line)
    return line


def read_one_line(parent, name):
    """
    Read a file whose sole contents are a single line.

    Strips the newline.

    :return: Contents of the line, or None if file did not exist.
    """
    path = os.path.join(parent, name)
    try:
        line = file(path, 'rb').read()
    except IOError as e:
        if e.errno == errno.ENOENT:
            return None
        else:
            raise

    try:
        line = must_be_one_line(line)
    except (TruncatedLineError, TooManyLinesError) as e:
        raise ActivateError('File is corrupt: {path}: {msg}'.format(
                path=path,
                msg=e,
                ))
    return line


def write_one_line(parent, name, text):
    """
    Write a file whose sole contents are a single line.

    Adds a newline.
    """
    path = os.path.join(parent, name)
    tmp = '{path}.{pid}.tmp'.format(path=path, pid=os.getpid())
    with file(tmp, 'wb') as f:
        f.write(text + '\n')
        os.fsync(f.fileno())
    os.rename(tmp, path)


CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'


def check_osd_magic(path):
    """
    Check that this path has the Ceph OSD magic.

    :raises: BadMagicError if this does not look like a Ceph OSD data
    dir.
    """
    magic = read_one_line(path, 'magic')
    if magic is None:
        # probably not mkfs'ed yet
        raise BadMagicError(path)
    if magic != CEPH_OSD_ONDISK_MAGIC:
        raise BadMagicError(path)


def check_osd_id(osd_id):
    """
    Ensures osd id is numeric.
    """
    if not re.match(r'^[0-9]+$', osd_id):
        raise ActivateError('osd id is not numeric')


def get_osd_id(path):
    osd_id = read_one_line(path, 'whoami')
    if osd_id is not None:
        check_osd_id(osd_id)
    return osd_id


# TODO depend on python2.7
def _check_output(*args, **kwargs):
    process = subprocess.Popen(
        stdout=subprocess.PIPE,
        *args, **kwargs)
    out, _ = process.communicate()
    ret = process.wait()
    if ret:
        cmd = kwargs.get("args")
        if cmd is None:
            cmd = args[0]
        raise subprocess.CalledProcessError(ret, cmd, output=out)
    return out


def allocate_osd_id(
    cluster,
    fsid,
    keyring,
    ):
    log.debug('Allocating OSD id...')
    try:
        osd_id = _check_output(
            args=[
                '/usr/bin/ceph',
                '--cluster', cluster,
                '--name', 'client.bootstrap-osd',
                '--keyring', keyring,
                'osd', 'create', '--concise',
                fsid,
                ],
            )
    except subprocess.CalledProcessError as e:
        raise ActivateError('ceph osd create failed', e)
    osd_id = must_be_one_line(osd_id)
    check_osd_id(osd_id)
    return osd_id


def mkfs(
    path,
    cluster,
    osd_id,
    fsid,
    keyring,
    ):
    monmap = os.path.join(path, 'activate.monmap')
    subprocess.check_call(
        args=[
            '/usr/bin/ceph',
            '--cluster', cluster,
            '--name', 'client.bootstrap-osd',
            '--keyring', keyring,
            'mon', 'getmap', '-o', monmap,
            ],
        )

    subprocess.check_call(
        args=[
            '/usr/bin/ceph-osd',
            '--cluster', cluster,
            '--mkfs',
            '--mkkey',
            '-i', osd_id,
            '--monmap', monmap,
            '--osd-data', path,
            '--osd-journal', os.path.join(path, 'journal'),
            '--osd-uuid', fsid,
            '--keyring', os.path.join(path, 'keyring'),
            ],
        )
    # TODO ceph-osd --mkfs removes the monmap file?
    # os.unlink(monmap)


def auth_key(
    path,
    cluster,
    osd_id,
    keyring,
    ):
    subprocess.check_call(
        args=[
            '/usr/bin/ceph',
            '--cluster', cluster,
            '--name', 'client.bootstrap-osd',
            '--keyring', keyring,
            'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id),
            '-i', os.path.join(path, 'keyring'),
            'osd', 'allow *',
            'mon', 'allow rwx',
            ],
        )


def move_mount(
    path,
    cluster,
    osd_id,
    ):
    log.debug('Moving mount to final location...')
    parent = '/var/lib/ceph/osd'
    osd_data = os.path.join(
        parent,
        '{cluster}-{osd_id}'.format(cluster=cluster, osd_id=osd_id),
        )
    maybe_mkdir(osd_data)
    subprocess.check_call(
        args=[
            '/bin/mount',
            '--move',
            '--',
            path,
            osd_data,
            ],
        )


def start_daemon(
    cluster,
    osd_id,
    ):
    log.debug('Starting %s osd.%s...', cluster, osd_id)

    path = '/var/lib/ceph/osd/{cluster}-{osd_id}'.format(
        cluster=cluster, osd_id=osd_id)

    # upstart?
    try:
        if os.path.exists(os.path.join(path,'upstart')):
            subprocess.check_call(
                args=[
                    '/sbin/initctl',
                    # use emit, not start, because start would fail if the
                    # instance was already running
                    'emit',
                    # since the daemon starting doesn't guarantee much about
                    # the service being operational anyway, don't bother
                    # waiting for it
                    '--no-wait',
                    '--',
                    'ceph-osd',
                    'cluster={cluster}'.format(cluster=cluster),
                    'id={osd_id}'.format(osd_id=osd_id),
                    ],
                )
        elif os.path.exists(os.path.join(path, 'sysvinit')):
            subprocess.check_call(
                args=[
                    '/usr/sbin/service',
                    'ceph',
                    'start',
                    'osd.{osd_id}'.format(osd_id=osd_id),
                    ],
                )
        else:
            raise ActivateError('{cluster} osd.{osd_id} is not tagged with an init system'.format(
                    cluster=cluster,
                    osd_id=osd_id,
                    ))
    except subprocess.CalledProcessError as e:
        raise ActivateError('ceph osd start failed', e)

def detect_fstype(
    dev,
    ):
    fstype = _check_output(
        args=[
            '/sbin/blkid',
            # we don't want stale cached results
            '-p',
            '-s', 'TYPE',
            '-o' 'value',
            '--',
            dev,
            ],
        )
    fstype = must_be_one_line(fstype)
    return fstype


def get_conf(cluster, variable):
    try:
        p = subprocess.Popen(
            args=[
                '/usr/bin/ceph-conf',
                '--cluster={cluster}'.format(
                    cluster=cluster,
                    ),
                '--name=osd.',
                '--lookup',
                variable,
                ],
            stdout=subprocess.PIPE,
            close_fds=True,
            )
    except OSError as e:
        raise ActivateError('error executing ceph-conf', e)
    (out, _err) = p.communicate()
    ret = p.wait()
    if ret == 1:
        # config entry not found
        return None
    elif ret != 0:
        raise ActivateError('getting variable from configuration failed')
    value = out.split('\n', 1)[0]
    # don't differentiate between "var=" and no var set
    if not value:
        return None
    return value


MOUNT_OPTIONS = dict(
    btrfs='noatime,user_subvol_rm_allowed',
    # user_xattr is default ever since linux 2.6.39 / 3.0, but we'll
    # delay a moment before removing it fully because we did have some
    # issues with ext4 before the xatts-in-leveldb work, and it seemed
    # that user_xattr helped
    ext4='noatime,user_xattr',
    xfs='noatime',
    )


def mount(
    dev,
    fstype,
    options,
    ):
    # pick best-of-breed mount options based on fs type
    if options is None:
        options = MOUNT_OPTIONS.get(fstype, '')

    # mount
    path = tempfile.mkdtemp(
        prefix='mnt.',
        dir='/var/lib/ceph/tmp',
        )
    try:
        subprocess.check_call(
            args=[
                '/bin/mount',
                '-o', options,
                '--',
                dev,
                path,
                ],
            )
    except subprocess.CalledProcessError as e:
        try:
            os.rmdir(path)
        except (OSError, IOError):
            pass
        raise MountError(e)

    return path


def unmount(
    path,
    ):
    try:
        subprocess.check_call(
            args=[
                '/bin/umount',
                '--',
                path,
                ],
            )
    except subprocess.CalledProcessError as e:
        raise UnmountError(e)

def mount_activate(
    dev,
    activate_key_template,
    init,
    ):

    try:
        fstype = detect_fstype(dev=dev)
    except (subprocess.CalledProcessError,
            TruncatedLineError,
            TooManyLinesError) as e:
        raise FilesystemTypeError(
            'device {dev}'.format(dev=dev),
            e,
            )

    # TODO always using mount options from cluster=ceph for
    # now; see http://tracker.newdream.net/issues/3253
    mount_options = get_conf(
        cluster='ceph',
        variable='osd_mount_options_{fstype}'.format(
            fstype=fstype,
            ),
        )

    if mount_options is None:
        mount_options = get_conf(
            cluster='ceph',
            variable='osd_fs_mount_options_{fstype}'.format(
                fstype=fstype,
                ),
            )

    #remove whitespaces from mount_options
    if mount_options is not None:
        mount_options = "".join(mount_options.split())

    path = mount(dev=dev, fstype=fstype, options=mount_options)

    osd_id = None
    cluster = None
    try:
        (osd_id, cluster) = activate(path, activate_key_template, init)

        # check if the disk is already active
        active = False
        src_dev = os.stat(path).st_dev
        try:
            dst_dev = os.stat('/var/lib/ceph/osd/{cluster}-{osd_id}'.format(
                    cluster=cluster,
                    osd_id=osd_id)).st_dev
            if src_dev == dst_dev:
                active = True
        except:
            pass
        if active:
            log.info('%s osd.%s already mounted in position; unmounting ours.' % (cluster, osd_id))
            unmount(path)
        else:
            move_mount(
                path=path,
                cluster=cluster,
                osd_id=osd_id,
                )
        return (cluster, osd_id)

    except:
        log.error('Failed to activate')
        unmount(path)
        raise
    finally:
        # remove out temp dir
        os.rmdir(path)


def activate_dir(
    path,
    activate_key_template,
    init,
    ):

    if not os.path.exists(path):
        raise ActivateError(
            'directory %s does not exist' % path
            )

    (osd_id, cluster) = activate(path, activate_key_template, init)
    canonical = '/var/lib/ceph/osd/{cluster}-{osd_id}'.format(
        cluster=cluster,
        osd_id=osd_id)
    if path != canonical:
        # symlink it from the proper location
        create = True
        if os.path.lexists(canonical):
            old = os.readlink(canonical)
            if old != path:
                log.debug('Removing old symlink %s -> %s', canonical, old)
                try:
                    os.unlink(canonical)
                except:
                    raise ActivateError('unable to remove old symlink %s', canonical)
            else:
                create = False
        if create:
            log.debug('Creating symlink %s -> %s', canonical, path)
            try:
                os.symlink(path, canonical)
            except:
                raise ActivateError('unable to create symlink %s -> %s', canonical, path)

    return (cluster, osd_id)


def activate(
    path,
    activate_key_template,
    init,
    ):

    try:
        check_osd_magic(path)

        ceph_fsid = read_one_line(path, 'ceph_fsid')
        if ceph_fsid is None:
            raise ActivateError('No cluster uuid assigned.')
        log.debug('Cluster uuid is %s', ceph_fsid)

        # TODO use ceph_fsid to find the right cluster
        cluster = 'ceph'
        log.debug('Cluster name is %s', cluster)

        fsid = read_one_line(path, 'fsid')
        if fsid is None:
            raise ActivateError('No OSD uuid assigned.')
        log.debug('OSD uuid is %s', fsid)

        keyring = activate_key_template.format(cluster=cluster)

        osd_id = get_osd_id(path)
        if osd_id is None:
            osd_id = allocate_osd_id(
                cluster=cluster,
                fsid=fsid,
                keyring=keyring,
                )
            write_one_line(path, 'whoami', osd_id)
        log.debug('OSD id is %s', osd_id)

        if not os.path.exists(os.path.join(path, 'ready')):
            log.debug('Initializing OSD...')
            # re-running mkfs is safe, so just run until it completes
            mkfs(
                path=path,
                cluster=cluster,
                osd_id=osd_id,
                fsid=fsid,
                keyring=keyring,
                )

        if init is not None:
            if init == 'auto':
                c = get_conf(
                    cluster=cluster,
                    variable='init'
                    )
                if c is not None:
                    init = c
                else:
                    (distro, release, codename) = platform.dist()
                    if distro == 'Ubuntu':
                        init = 'upstart'
                    else:
                        init = 'sysvinit'

            log.debug('Marking with init system %s', init)
            with file(os.path.join(path, init), 'w'):
                pass

            # remove markers for others, just in case.
            for other in init_systems:
                if other != init:
                    try:
                        os.unlink(os.path.join(path, other))
                    except:
                        pass

        if not os.path.exists(os.path.join(path, 'active')):
            log.debug('Authorizing OSD key...')
            auth_key(
                path=path,
                cluster=cluster,
                osd_id=osd_id,
                keyring=keyring,
                )
            write_one_line(path, 'active', 'ok')
        log.debug('%s osd.%s data dir is ready at %s', cluster, osd_id, path)
        return (osd_id, cluster)
    except:
        raise

def parse_args():
    parser = argparse.ArgumentParser(
        description='Activate a Ceph OSD',
        )
    parser.add_argument(
        '-v', '--verbose',
        action='store_true', default=None,
        help='be more verbose',
        )
    parser.add_argument(
        '--mount',
        action='store_true', default=None,
        help='mount a block device; path must follow',
        )
    parser.add_argument(
        '--activate-key',
        metavar='PATH',
        help='bootstrap-osd keyring path template (%(default)s)',
        dest='activate_key_template',
        )
    parser.add_argument(
        'path',
        metavar='PATH',
        nargs='?',
        help='path to block device or directory',
        )
    parser.add_argument(
        '--mark-init',
        metavar='INITSYSTEM',
        help='init system to manage this dir',
        default='auto',
        choices=init_systems,
        )
    parser.set_defaults(
        activate_key_template='/var/lib/ceph/bootstrap-osd/{cluster}.keyring',
        # we want to hold on to this, for later
        prog=parser.prog,
        )
    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    loglevel = logging.INFO
    if args.verbose:
        loglevel = logging.DEBUG

    logging.basicConfig(
        level=loglevel,
        )

    try:
        cluster = None
        osd_id = None

        if not os.path.exists(args.path):
            raise ActivateError('%s does not exist', args.path)

        mode = os.stat(args.path).st_mode
        if stat.S_ISBLK(mode):
            (cluster, osd_id) = mount_activate(
                dev=args.path,
                activate_key_template=args.activate_key_template,
                init=args.mark_init,
                )
        elif stat.S_ISDIR(mode):
            (cluster, osd_id) = activate_dir(
                path=args.path,
                activate_key_template=args.activate_key_template,
                init=args.mark_init,
                )
        else:
            raise ActivateError('%s is not a directory or block device', args.path)

        start_daemon(
            cluster=cluster,
            osd_id=osd_id,
            )

    except ActivateError as e:
        print >>sys.stderr, '{prog}: {msg}'.format(
            prog=args.prog,
            msg=e,
            )
        sys.exit(1)

if __name__ == '__main__':
    main()
