#!/bin/bash

# -------------------------------------------------------------------------- #
# Copyright 2002-2021, OpenNebula Project, OpenNebula Systems                #
#                                                                            #
# Licensed under the Apache License, Version 2.0 (the "License"); you may    #
# not use this file except in compliance with the License. You may obtain    #
# a copy of the License at                                                   #
#                                                                            #
# http://www.apache.org/licenses/LICENSE-2.0                                 #
#                                                                            #
# Unless required by applicable law or agreed to in writing, software        #
# distributed under the License is distributed on an "AS IS" BASIS,          #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   #
# See the License for the specific language governing permissions and        #
# limitations under the License.                                             #
#--------------------------------------------------------------------------- #

DRIVER_PATH=$(dirname $0)
source "$DRIVER_PATH/../../etc/vmm/kvm/kvmrc"
source "$DRIVER_PATH/../../scripts_common.sh"
XPATH="$DRIVER_PATH/../../datastore/xpath.rb"

get_qemu_img_version() {
    qemu-img --version | head -1 | awk '{print $3}' | \
        sed -e 's/[^0-9\.]//' | awk -F. '{ printf("%d%03d%03d\n", $1,$2,$3); }'
}

is_readonly() {
    local DOMAIN=$1
    local DISK=$2

    READ_ONLY=$(virsh --connect $LIBVIRT_URI dumpxml $DOMAIN | \
            $XPATH --stdin --subtree \
            "//domain/devices/disk[source/@file='$DISK']/readonly")

    [[ "$READ_ONLY" =~ '<readonly/>' ]]
}

get_size_and_format_of_disk_img() {
    local QEMU_IMG_PATH="$1"
    local PARAM="$2"

    if [ -L "$QEMU_IMG_PATH" ]; then
        TARGET=$(readlink "$QEMU_IMG_PATH")A
        # symlink to disk.X.snap/base.1
        if [[ "$TARGET" =~ disk.[0-9]*.snap/base.1  ]]; then
            echo unknown qcow2-symlink
            return
        fi

        # symlink, assume network disk
        echo unknown network-disk
        return
    fi

    IMG_INFO=$(qemu-img info $PARAM "$QEMU_IMG_PATH" --output json)

    if [ -z "$IMG_INFO" ]; then
        echo "Failed to get image info for $QEMU_IMG_PATH"
        exit 1
    fi

    SIZE=$(echo $IMG_INFO | sed -nE 's/^.*virtual-size.: ([0-9]+).*/\1/p')
    FORMAT=$(echo $IMG_INFO | sed -nE 's/^.*format.: "([a-z0-9]+)".*/\1/p')

    if [ -z "$SIZE" ] || [ -z "$FORMAT" ]; then
        echo "Failed to get image $QEMU_IMG_PATH size or format"
        exit 1
    fi

    echo $SIZE $FORMAT
}

create_target_disk_img() {
    local DEST_HOST=$1
    local QEMU_IMG_PATH="$2"
    local SIZE="$3"

    ssh_monitor_and_log "$DEST_HOST" \
        "qemu-img create -f qcow2 '$QEMU_IMG_PATH' '$SIZE'" \
        "Failed to create new qcow image for $QEMU_IMG_PATH"
}

STDIN=$(cat -)
DEPLOY_ID=$1
DEST_HOST=$2

#rbt: brest3
echo "$STDIN" | tr -d '\n' | grep -q '^ *<.*> *$' || STDIN="$(echo "$STDIN" | base64 -d)"
host=$3
if [ "$ONE_BREST_MODE" == "1" ]; then
    LIBVIRT_URI=qemu+ssh://$host/system #rbt
fi

DISKS=$(virsh --connect $LIBVIRT_URI domblklist "$DEPLOY_ID" \
    | tail -n+3 | grep -v "^$" | awk '{print $1 "," $2}')

unset i j XPATH_ELEMENTS
while IFS= read -r -d '' element; do
    XPATH_ELEMENTS[i++]="$element"
done < <(echo $STDIN | $XPATH \
            /VMM_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/SHARED \
            /VMM_DRIVER_ACTION_DATA/DISK_TARGET_PATH)

SHARED="${XPATH_ELEMENTS[j++]}"
VM_DIR="${XPATH_ELEMENTS[j++]}"

# use "force-share" param for qemu >= 2.10
[ "$(get_qemu_img_version)" -ge 2010000 ] && QEMU_IMG_PARAM="-U"

# migration can't be done with domain snapshots, drop them first
SNAPS=$(monitor_and_log \
"virsh --connect $LIBVIRT_URI snapshot-list $DEPLOY_ID --name 2>/dev/null" \
"Failed to get snapshots for $DEPLOY_ID")

for SNAP in $SNAPS; do
    exec_and_log \
        "virsh --connect $LIBVIRT_URI snapshot-delete $DEPLOY_ID --snapshotname $SNAP --metadata" \
        "Failed to delete snapshot $SNAP from $DEPLOY_ID"
done

# Compact memory
if [ "x$CLEANUP_MEMORY_ON_START" = "xyes" ]; then
    ssh_exec_and_log "$DEST_HOST" "sudo -n sysctl vm.drop_caches=3 vm.compact_memory=1 >/dev/null" \
        "Failed compact memory on $DEST_HOST"
fi

#rbt: add --inherit-owner and parallel
if [ "$SHARED" = "YES" ]; then
    retry_if "active block job" 3 5 virsh --connect $LIBVIRT_URI migrate \
        --live --inherit-owner --parallel $MIGRATE_OPTIONS $DEPLOY_ID $QEMU_PROTOCOL://$DEST_HOST/system

    RC=$?
else
    if [[ -z "$DISKS" ]]; then
        error_message "No disks discovered on the VM"
        exit 1
    fi

    ssh_monitor_and_log "$DEST_HOST" "mkdir -p '$VM_DIR'" \
        "Failed to make remote directory $VM_DIR image"

    MIGRATE_DISKS=""

    for DISK_STR in $DISKS; do

        DISK_DEV=${DISK_STR/,*/}
        DISK_PATH=${DISK_STR/*,/}

        read -r SIZE FORMAT <<<"$(get_size_and_format_of_disk_img "$DISK_PATH" "$QEMU_IMG_PARAM")"

        if [ "$FORMAT" = "raw" ]; then
            if ! is_readonly $DEPLOY_ID $DISK_PATH; then
                RAW_DISKS+=" $DISK_PATH"
                MIGRATE_DISKS+="${MIGRATE_DISKS:+,}${DISK_DEV}"
            fi

            # do initial rsync
            multiline_exec_and_log "tar -cSf - $DISK_PATH | $SSH $DEST_HOST 'tar xSf - -C / '" \
                "Failed to rsync disk $DISK_PATH to $DEST_HOST:$DISK_PATH"

        elif [ "$FORMAT" = "qcow2" ]; then
            create_target_disk_img "$DEST_HOST" "$DISK_PATH" "$SIZE"
            MIGRATE_DISKS+="${MIGRATE_DISKS:+,}${DISK_DEV}"

        elif [ "$FORMAT" = "qcow2-symlink" ]; then
            # don't create disk, .snap dir will be copied anyway
            MIGRATE_DISKS+="${MIGRATE_DISKS:+,}${DISK_DEV}"

        elif [ "$FORMAT" = "network-disk" ]; then
            true # skip
        fi

        # copy disk snapshots
        if [ -d "${DISK_PATH}.snap" ] || [ -L "${DISK_PATH}.snap" ]; then
            multiline_exec_and_log "tar -cSf - $DISK_PATH.snap | $SSH $DEST_HOST 'tar xSf - -C / '" \
                "Failed to rsync disk snapshot ${DISK_PATH}.snap to $DEST_HOST"
        fi

        # recreate symlinks
        if [ -L "$DISK_PATH" ]; then
            LINK_TARGET=$(readlink $DISK_PATH)
            ssh_exec_and_log "$DEST_HOST" "[ -L \"$DISK_PATH\" ] || ln -s \"$LINK_TARGET\" \"$DISK_PATH\""
                "Failed to create symlink $DISK_PATH -> $LINK_TARGET on $DEST_HOST"
        fi
    done


    # freeze/suspend domain and rsync raw disks again
    if [ -n "$RAW_DISKS" ]; then
        if virsh --connect $LIBVIRT_URI domfsfreeze $DEPLOY_ID; then
            # local domfsthaw for the case migration fails
            trap "virsh --connect $LIBVIRT_URI domfsthaw $DEPLOY_ID" EXIT
            FREEZE="yes"
        else
            if virsh --connect $LIBVIRT_URI suspend $DEPLOY_ID; then
                # local resume for the case migration fails
                trap "virsh --connect $LIBVIRT_URI resume $DEPLOY_ID" EXIT
                SUSPEND="yes"
            else
                error_message "Could not freeze or suspend the domain"
                exit 1
            fi
        fi

        for DISK in $RAW_DISKS; do
            multiline_exec_and_log "tar -cSf - $DISK | $SSH $DEST_HOST 'tar xSf - -C / '" \
                "Failed to rsync disk $DISK to $DEST_HOST:$DISK"
        done
    fi

    # Enumerate disks to copy
    if [ -n "$MIGRATE_DISKS" ]; then
        DISK_OPTS="--copy-storage-all --migrate-disks ${MIGRATE_DISKS}"
    fi

    #rbt: add --inherit-owner and parallel
    retry_if "active block job" 3 5 \
        virsh --connect $LIBVIRT_URI migrate \
        --live --inherit-owner --parallel $MIGRATE_OPTIONS $DEPLOY_ID $QEMU_PROTOCOL://$DEST_HOST/system \
        $DISK_OPTS
    RC=$?

    # remote domfsthaw/resume, give it time
    if [ $RC -eq 0 ]; then
        if [ "$FREEZE" = "yes" ]; then
            for I in $(seq 5); do
                virsh --connect $QEMU_PROTOCOL://$DEST_HOST/system domfsthaw $DEPLOY_ID \
                    && break
                sleep 2
            done
        elif [ "$SUSPEND" = "yes" ]; then
            for I in $(seq 5); do
                virsh --connect $QEMU_PROTOCOL://$DEST_HOST/system resume $DEPLOY_ID \
                    && break
                sleep 2
            done
        fi
    fi
fi

# cleanup target host in case of error
if [ $RC -ne 0 ]; then
    for CLEAN_OP in destroy undefine; do
        virsh --connect $QEMU_PROTOCOL://$DEST_HOST/system "${CLEAN_OP}" $DEPLOY_ID >/dev/null 2>&1
    done

#rbt: BREST-1887
    if [ "$SHARED" != "YES" ]; then
        ssh $DEST_HOST "rm -rf $VM_DIR"
    fi

    error_message "Could not migrate $DEPLOY_ID to $DEST_HOST"
    exit $RC
fi

# rbt: BREST-1260
# redefine potential snapshots after live migration
if [ "$SHARED" = "YES" ]; then
SNAP_REDEFINE_CMD=$(cat <<EOF
    UUID=\$(virsh --connect qemu:///system dominfo $DEPLOY_ID | awk '/UUID:/ {print \$2}')
    DISK_DIR=$VM_DIR
    for SNAPSHOT_MD_XML in \$(ls -v \${DISK_DIR}/snap-*.xml 2>/dev/null); do
        # replace uuid in the snapshot metadata xml
        sed -i "s%<uuid>[[:alnum:]-]*</uuid>%<uuid>\$UUID</uuid>%" \$SNAPSHOT_MD_XML

        # redefine the snapshot using the xml metadata file
        virsh --connect qemu:///system snapshot-create $DEPLOY_ID \$SNAPSHOT_MD_XML --redefine > /dev/null || true
    done
EOF
)
    ssh_exec_and_log $DEST_HOST "$SNAP_REDEFINE_CMD" \
        "Error redefine snapshots"
fi

# Synchronize VM time on background on remote host
if [ "$SYNC_TIME" = "yes" ]; then
    SYNC_TIME_CMD=$(cat <<EOF
(
for I in \$(seq 4 -1 1); do
    if virsh --connect $LIBVIRT_URI --readonly dominfo $DEPLOY_ID; then
        virsh --connect $LIBVIRT_URI domtime --sync $DEPLOY_ID && exit
        [ "\$I" -gt 1 ] && sleep 5
    else
        exit
    fi
done
) &>/dev/null &
EOF
)
    ssh_exec_and_log_no_error "${DEST_HOST}" \
        "${SYNC_TIME_CMD}" \
        "Failed to synchronize VM time"
fi

# Compact memory
if [ "x$CLEANUP_MEMORY_ON_STOP" = "xyes" ]; then
    sudo -n sysctl vm.drop_caches=3 vm.compact_memory=1 &>/dev/null &
fi
