#!/usr/bin/env python3
#
# Copyright (c) Greenplum Inc 2008. All Rights Reserved.
#
#
# THIS IMPORT MUST COME FIRST
#
# import mainUtils FIRST to get python version check
from gppylib.mainUtils import *

import os, sys
import signal
import time
import pprint
from optparse import Option, OptionGroup, OptionParser, OptionValueError, SUPPRESS_USAGE
import socket
from gppylib.mainUtils import parseStatusLine

try:
    from gppylib.gpparseopts import OptParser, OptChecker
    from gppylib.gplog import *
    from gppylib.db import dbconn
    from gppylib.db import catalog
    from gppylib.gparray import *
    from gppylib import userinput
    from gppylib import pgconf
    from gppylib.commands import unix
    from gppylib.commands import gp
    from gppylib.commands.gp import SEGMENT_STOP_TIMEOUT_DEFAULT, SegmentStop, GpSegStopCmd
    from gppylib.commands import base
    from gppylib.commands import pg
    from gppylib.commands import dca
    from gppylib.utils import TableLogger
    from gppylib.gp_era import GpEraFile
    from gppylib.operations.utils import ParallelOperation, RemoteOperation
    from gppylib.operations.rebalanceSegments import ReconfigDetectionSQLQueryCommand
    from gppylib.operations.detect_unreachable_hosts import get_unreachable_segment_hosts
except ImportError as e:
    sys.exit('ERROR: Cannot import modules.  Please check that you have sourced greenplum_path.sh.  Detail: ' + str(e))

DEFAULT_NUM_WORKERS = 64
logger = get_default_logger()

PG_CTL_STATUS_RUNNING = 0
PG_CTL_STATUS_STOPPED = 3
PG_CTL_STATUS_SLEEP_INTERVAL = 1.0  # number of seconds to sleep before issuing next pg_ctl status


# ---------------------------------------------------------------
class SegStopStatus:
    """Tracks result of trying to stop an individual segment database"""

    def __init__(self, db, stopped=False, reason=None, failedCmd=None, timedOut=False):
        self.db = db
        self.stopped = stopped
        self.reason = reason
        self.failedCmd = failedCmd
        self.timedOut = timedOut

    def __str__(self):
        if self.stopped:
            return "DBID:%d  STOPPED" % self.db.dbid
        else:
            return "DBID:%d  FAILED  host:'%s' datadir:'%s' with reason:'%s'" % (
            self.db.dbid, self.db.hostname, self.db.datadir, self.reason)


def print_progress(pool, interval=10):
    """
    Waits for a WorkerPool to complete, printing a progress percentage marker
    once at the beginning of the call, and thereafter at the provided interval
    (default ten seconds). A final 100% marker is printed upon completion.
    """
    def print_completed_percentage():
        # pool.completed can change asynchronously; save its value.
        completed = pool.completed

        pct = 0
        if pool.assigned:
            pct = float(completed) / pool.assigned

        pool.logger.info('%0.2f%% of jobs completed' % (pct * 100))
        return completed >= pool.assigned

    # print_completed_percentage() returns True if we're done.
    while not print_completed_percentage():
        pool.join(interval)


# ---------------------------------------------------------------
class GpStop:
    ######
    def __init__(self, mode, coordinator_datadir=None,
                 parallel=DEFAULT_NUM_WORKERS, quiet=False, coordinatoronly=False, sighup=False,
                 interactive=False, stopstandby=False, restart=False,
                 timeout=SEGMENT_STOP_TIMEOUT_DEFAULT, logfileDirectory=False, onlyThisHost=None,
                 fts_hosts=None, etcd_hosts=None, is_external_fts=False):
        self.mode = mode
        self.coordinator_datadir = coordinator_datadir
        self.pool = None
        self.parallel = parallel
        self.quiet = quiet
        self.pid = 0
        self.coordinatoronly = coordinatoronly
        self.sighup = sighup
        self.interactive = interactive
        self.stopstandby = stopstandby
        self.restart = restart
        self.hadFailures = False
        self.timeout = timeout
        self.logfileDirectory = logfileDirectory
        self.onlyThisHost = onlyThisHost
        self.fts_hosts = fts_hosts
        self.etcd_hosts = etcd_hosts
        # some variables that will be assigned during run()
        self.gphome = None
        self.port = None
        self.dburl = None
        self.conn = None
        self.gparray = None
        self.gpversion = None
        self.is_external_fts=is_external_fts

        logger.debug("Setting level of parallelism to: %d" % self.parallel)
        pass


    #####
    def _find_any_primary_and_mirror_on_same_host(self, segs):
        map_content_id_to_segment = {}
        for seg in segs:
            seg_content_id = seg.getSegmentContentId()
            if seg_content_id in map_content_id_to_segment:
                if seg.isSegmentPrimary(True):
                    return seg, map_content_id_to_segment[seg_content_id]
                else:
                    return map_content_id_to_segment[seg_content_id], seg
            else:
                map_content_id_to_segment[seg_content_id] = seg

        return None, None

    def _filter_segments_for_single_host_stop(self):
        host_to_segments_mapping = self.gparray.getSegmentsByHostName(self.gparray.getSegDbList())
        segments_on_host = host_to_segments_mapping[self.onlyThisHost]
        up_segments_on_host = [seg for seg in segments_on_host if seg.isSegmentUp()]
        matching_primary, matching_mirror = self._find_any_primary_and_mirror_on_same_host(up_segments_on_host)
        if matching_primary or matching_mirror:
            raise Exception("Segment host '%s' has both of corresponding primary "
                            "'%s' and mirror '%s'. Aborting." % (matching_primary.getSegmentHostName(),
                                                                 matching_primary, matching_mirror))

        for seg in up_segments_on_host:
            if seg.isSegmentModeSynchronized():
                continue
            raise Exception("Segment '%s' not synchronized. Aborting." % seg)

        return up_segments_on_host


    def run(self):
        """
        Run and return the exitCode for the program
        """
        self._prepare()

        if self.gparray.get_segment_count() == 0:
            self.coordinatoronly = True

        if self.coordinatoronly:
            if self.sighup:
                return self._sighup_cluster()
            if self.interactive:
                if not userinput.ask_yesno(None, "\nContinue with coordinator-only shutdown", 'N'):
                    raise UserAbortedException()
            try:
                # Disable Ctrl-C
                signal.signal(signal.SIGINT, signal.SIG_IGN)

                self._stop_coordinator()
            finally:
                # Reenable Ctrl-C
                self.cleanup()
                signal.signal(signal.SIGINT, signal.default_int_handler)
            if self.is_external_fts:    
                self._clean_fts()
                if self.etcd_hosts is not None:
                    self._clean_etcd()
            if self.restart:
                    logger.info("Restarting System...")
                    gp.NewGpStart.local('restarting system', verbose=logging_is_verbose(), coordinatorOnly=self.coordinatoronly,
                                        nostandby=not self.stopstandby, coordinatorDirectory=self.coordinator_datadir)
        else:
            if self.sighup:
                return self._sighup_cluster()
            else:

                segs = []
                if self.onlyThisHost:
                    if self.onlyThisHost in self.gparray.get_coordinator_host_names():
                        raise Exception("Specified host '%s' has the coordinator or standby coordinator on it. This node can only be stopped as part of a full-cluster gpstop, without '--host'." %
                                        self.onlyThisHost)
                    if not self.gparray.hasMirrors:
                        raise Exception("Cannot perform host-specific gpstop on a cluster without segment mirroring.")
                    segs = self._filter_segments_for_single_host_stop()
                else:
                    segs = self.gparray.getSegDbList()

                if self.interactive:
                    self._summarize_actions(segs)
                    if not userinput.ask_yesno(None, "\nContinue with Cloudberry instance shutdown", 'N'):
                        raise UserAbortedException()

                try:
                    # Disable Ctrl-C
                    signal.signal(signal.SIGINT, signal.SIG_IGN)

                    if self.onlyThisHost is None:
                        self._stop_coordinator()
                        self._stop_standby()
                    self._stop_segments(segs)
                finally:
                    # Reenable Ctrl-C
                    self.cleanup()
                    signal.signal(signal.SIGINT, signal.default_int_handler)

                if self.onlyThisHost:
                    logger.info("Recognizing new cluster state...")
                    try:
                        # currently responsible for triggering an update to gp_segment_configuration
                        # because dbconn.connect() internally calls commit()
                        self.conn = dbconn.connect(self.dburl)

                        # backup in case connect() does not do a commit
                        ReconfigDetectionSQLQueryCommand(self.conn).run()
                    except Exception as e:
                        logger.debug('query trying to start a transaction failed: %s' % str(e))
                        logger.debug('expected: the purpose was that by attempting a transaction, gp_segment_configuration would be updated')
                    finally:
                        if self.conn:
                            self.conn.close()
                if self.is_external_fts:
                    self._clean_fts()
                    if self.etcd_hosts is not None:
                        self._clean_etcd()

                if self.restart:
                    logger.info("Restarting System...")
                    gp.NewGpStart.local('restarting system', verbose=logging_is_verbose(),
                                        nostandby=not self.stopstandby, coordinatorDirectory=self.coordinator_datadir)
                else:
                    if dca.is_dca_appliance():
                        logger.info("Unregistering with DCA")
                        dca.DcaGpdbStopped.local()
                        logger.info("Unregistered with DCA")

                if self.hadFailures:
                    # MPP-15208
                    return 2
        return 0
    
    def _clean_fts(self):
        if not os.path.exists(self.fts_hosts):
            raise gp.GpError("FTS host file %s does not exist." % self.fts_hosts)
        fts_host_machine_list = read_hosts(self.fts_hosts)
        logger.info("Start to kill FTS processes...")
        for fts in fts_host_machine_list:
            kill_fts(fts)
        time.sleep(5)
        for fts in fts_host_machine_list:
            if check_fts(fts):
                raise gp.GpError("FTS process on %s is still alive" % fts)
        logger.info("kill all FTS processes successfully.")
        

    def _clean_etcd(self):
        if not os.path.exists(self.etcd_hosts):
            raise gp.GpError("ETCD host file %s does not exist." % self.etcd_hosts)
        etcd_host_machine_list = read_hosts(self.etcd_hosts)
        logger.info("Start to kill ETCD processes...")
        for etcd in etcd_host_machine_list:
            kill_etcd(etcd)
        
        time.sleep(60)
        for etcd in etcd_host_machine_list:
            if check_etcd(etcd):
                raise gp.GpError("ETCD process on %s is still alive" % etcd)
        logger.info("kill all ETCD processes successfully.")


    ######
    def cleanup(self):
        if self.pool:
            self.pool.haltWork()

            ######

    def _prepare(self):
        logger.info("Gathering information and validating the environment...")
        self.gphome = gp.get_gphome()
        if self.coordinator_datadir is None:
            self.coordinator_datadir = gp.get_coordinatordatadir()
        self.user = unix.getUserName()
        gp.check_permissions(self.user)
        self._read_postgresqlconf()
        self._check_db_running()
        self._build_gparray()
        if self.onlyThisHost:
            self._is_hostname_valid()
        self._check_version()

    ######
    def _is_hostname_valid(self):
        segments = self.gparray.getSegmentsByHostName( self.gparray.getDbList())
        host_names = list(segments.keys())
        if self.onlyThisHost not in host_names:
            logger.error("host '%s' is not found in gp_segment_configuration" % self.onlyThisHost)
            logger.error("hosts in cluster config: %s" % host_names)
            raise SystemExit(1)

    ######
    def _check_version(self):
        self.gpversion = gp.GpVersion.local('local Cloudberry version check', self.gphome)
        logger.info("Cloudberry Version: '%s'" % self.gpversion)

    ######
    def _read_postgresqlconf(self):
        logger.debug("Obtaining coordinator's port from coordinator data directory")
        pgconf_dict = pgconf.readfile(self.coordinator_datadir + "/postgresql.conf")
        self.port = pgconf_dict.int('port')
        logger.debug("Read from postgresql.conf port=%s" % self.port)


        ######

    def _check_db_running(self):
        if os.path.exists(self.coordinator_datadir + '/postmaster.pid'):
            self.pid = gp.read_postmaster_pidfile(self.coordinator_datadir)
            if not unix.check_pid(self.pid):
                logger.warning("Have a postmaster.pid file but no Coordinator segment process running")
                logger.info("Clearing postmaster.pid file and /tmp lock files")

                lockfile = "/tmp/.s.PGSQL.%s" % self.port
                logger.info("Clearing Coordinator instance lock files")
                os.remove(lockfile)

                logger.info("Clearing Coordinator instance pid file")
                os.remove("%s/postmaster.pid" % self.coordinator_datadir)

                logger.info("Setting recovery parameters")
                self.mode = 'fast'
                logger.info("Commencing forced shutdown")
            pass
        else:
            raise ExceptionNoStackTraceNeeded(
                'postmaster.pid file does not exist.  is Cloudberry instance already stopped?')

    ######
    def _build_gparray(self):
        logger.info("Obtaining Cloudberry Coordinator catalog information")

        logger.info("Obtaining Segment details from coordinator...")
        self.dburl = dbconn.DbURL(port=self.port, dbname='template1')
        self.gparray = GpArray.initFromCatalog(self.dburl, utility=True)

    class _SigIntHandler(object):
        def __init__(self):
            self.interrupted = False

        def _handler(self, signum, stack_frame):
            self.interrupted = True

        def enable(self):
            self.interrupted = False
            signal.signal(signal.SIGINT, self._handler)

        def disable(self):
            signal.signal(signal.SIGINT, signal.SIG_IGN)

        def __enter__(self):
            self._prev_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)

            # Try to prevent easy mistakes by ensuring that SIGINT was
            # ignored before we entered the manager.
            if self._prev_handler != signal.SIG_IGN:
                signal.signal(signal.SIGINT, self._prev_handler)
                raise Exception("SIGINT disposition must be SIG_IGN before entering SigIntHandler")
            return self

        def __exit__(self, exc_type, exc_value, traceback):
            # Put back the previous handler.
            signal.signal(signal.SIGINT, self._prev_handler)
            return False


    def _is_coordinator_stopped(self):
        with open(os.devnull, 'w') as devnull:
            ret = subprocess.call(['pg_ctl', 'status', '-D', self.coordinator_datadir],
                                  stdout=devnull)
        if ret == PG_CTL_STATUS_STOPPED:
            return True
        if ret != PG_CTL_STATUS_RUNNING:
            raise Exception('pg_ctl status failed with return code %d' % ret)
        return False



    # Send a no-wait stop to the coordinator, and then poll for it to stop.  If the user gets impatient or
    # the operation times out, show the connections and allow the user to upgrade the stop to fast or
    # immediate mode.
    def _stop_coordinator_smart(self):
        logger.info("Stopping coordinator segment and waiting for user connections to finish ...")

        self.conn = dbconn.connect(self.dburl, utility=True)
        user_connections_header, user_connections = catalog.getUserConnectionInfo(self.conn)
        self.conn.close()

        ret = subprocess.call(['pg_ctl', 'stop', '-W', '-m', 'smart', '-D', self.coordinator_datadir])
        if ret:
            raise Exception('pg_ctl stop smart failed')

        time_waited = 0
        wait_forever = False
        with GpStop._SigIntHandler() as handler:
            while not self._is_coordinator_stopped():
                # time.sleep() can be safely interrupted (https://docs.python.org/2/library/time.html#time.sleep)
                handler.enable()
                time.sleep(PG_CTL_STATUS_SLEEP_INTERVAL)
                time_waited = time_waited + PG_CTL_STATUS_SLEEP_INTERVAL
                handler.disable()

                if handler.interrupted or (time_waited >= self.timeout and not wait_forever):
                    handler.interrupted = False

                    warn_text = """Smart mode failed to shutdown the coordinator.\n"""
                    if len(user_connections):
                        warn_text += """  There were {} user connections at the start of the shutdown. Note: These connections may be outdated.
  The following connections were found before shutting down:\n""".format(len(user_connections))
                        for user_conn in user_connections:
                            warn_text += "  %s" % pprint.pformat(user_conn)

                    logger.warn(warn_text)
                    logger.info("Continue waiting in smart mode, stop in fast mode, or stop in immediate mode.")
                    mode_selection = userinput.ask_string("Your_choice",
                                                          "['(s)mart_mode', '(f)ast_mode', '(i)mmediate_mode']",
                                                          's',
                                                          ['s', 'f', 'i'])
                    logger.info("Your choice was '%s'" % mode_selection)
                    if mode_selection == 'f':
                        self.mode = 'fast'
                        return False
                    elif mode_selection == 'i':
                        self.mode = 'immediate'
                        return False
                    else:
                        wait_forever = True
                        logger.info("Continuing waiting for user connections to finish ...")


    ######
    # NOTE: we must make sure the coordinator is down before leaving this routine
    def _stop_coordinator(self, coordinatorOnly=False):
        ''' shutsdown the coordinator '''

        logger.info("Commencing Coordinator instance shutdown with mode='%s'" % self.mode)
        logger.info("Coordinator segment instance directory=%s" % self.coordinator_datadir)

        e = GpEraFile(self.coordinator_datadir, logger=get_logger_if_verbose())
        e.end_era()

        # Get a list of postgres processes running before stopping the server
        postgres_pids = gp.get_postgres_segment_processes(self.coordinator_datadir, self.gparray.coordinator.hostname)
        logger.debug("Postgres processes running on coordinator host: {0}".format(postgres_pids))

        try:
            if self.mode == 'smart':
                self._stop_coordinator_smart()

            # NOTE: _stop_coordinator_smart() can change self.mode
            if self.mode != 'smart':
                cmd = gp.CoordinatorStop("stopping coordinator", self.coordinator_datadir, mode=self.mode, timeout=self.timeout)
                cmd.run(validateAfter=True)
        except:
            # Didn't stop in timeout or pg_ctl failed.  So try kill
            (succeeded, mypid, file_datadir) = pg.ReadPostmasterTempFile.local("Read coordinator tmp file",
                                                                               self.dburl.pgport).getResults()
            if succeeded and file_datadir == self.coordinator_datadir:
                if unix.check_pid(mypid):
                    logger.info("Failed to shutdown coordinator with pg_ctl.")
                    logger.info("Sending SIGQUIT signal...")
                    os.kill(mypid, signal.SIGQUIT)
                    time.sleep(5)

                    # Still not gone... try SIGABRT
                    if unix.check_pid(mypid):
                        logger.info("Sending SIGABRT signal...")
                        os.kill(mypid, signal.SIGABRT)
                        time.sleep(5)

                    if not unix.check_pid(mypid):
                        # Clean up files
                        lockfile = "/tmp/.s.PGSQL.%s" % self.dburl.pgport
                        if os.path.exists(lockfile):
                            logger.info("Clearing segment instance lock files")
                            os.remove(lockfile)

        # Use the process list and make sure that all the processes are killed at the end
        logger.info('Attempting forceful termination of any leftover coordinator process')
        unix.kill_9_segment_processes(self.coordinator_datadir, postgres_pids, self.gparray.coordinator.hostname)

        logger.debug("Successfully shutdown the Coordinator instance in admin mode")

    ######
    def _stop_standby(self):
        """ assumes prepare() has been called """
        if not self.stopstandby:
            return True

        if self.gparray.standbyCoordinator:
            standby = self.gparray.standbyCoordinator

            if get_unreachable_segment_hosts([standby.hostname], 1):
                logger.warning("Standby is unreachable, skipping shutdown on standby")
                return True

            # Get a list of postgres processes running before stopping the server
            postgres_pids = gp.get_postgres_segment_processes(standby.datadir, standby.hostname)
            logger.debug("Postgres processes running on standby host: {0}".format(postgres_pids))

            logger.info("Stopping coordinator standby host %s mode=%s" % (standby.hostname, self.mode))
            try:
                cmd = SegmentStop("stopping coordinator standby",
                                     standby.datadir, mode=self.mode,
                                     timeout=self.timeout,
                                     ctxt=base.REMOTE,
                                     remoteHost=standby.hostname)
                cmd.run(validateAfter=True)
            except base.ExecutionError as e:
                logger.warning("Error occured while stopping the standby coordinator: %s" % e)

            if not pg.DbStatus.remote('checking status of standby coordinator instance', standby, standby.hostname):
                logger.info("Successfully shutdown standby process on %s" % standby.hostname)
                return True
            else:
                logger.warning("Process coordinator standby still running, will issue fast shutdown with immediate")
                try:
                    cmd = SegmentStop("stopping coordinator standby", standby.datadir, mode='immediate',
                                         timeout=self.timeout,
                                         ctxt=base.REMOTE, remoteHost=standby.hostname)
                    cmd.run(validateAfter=True)
                except base.ExecutionError as e:
                    logger.warning("Error occured while stopping the standby coordinator: %s" % e)

                if not pg.DbStatus.remote('checking status of standby coordinator instance', standby, standby.hostname):
                    logger.info("Successfully shutdown coordinator standby process on %s" % standby.hostname)
                    return True
                else:
                    logger.error('Failed to stop standby. Attempting forceful termination of standby process')

                    # Use the process list and make sure that all the processes are killed at the end
                    unix.kill_9_segment_processes(standby.datadir, postgres_pids, standby.hostname)

                    if not pg.DbStatus.remote('checking status of standby coordinator instance', standby, standby.hostname):
                        logger.info("Successfully shutdown coordinator standby process on %s" % standby.hostname)
                        return True
                    else:
                        logger.error("Unable to stop coordinator standby on host: %s" % standby.hostname)
                        return False
        else:
            logger.info("No standby coordinator host configured")
            return True

    ######
    def _stop_segments(self, segs):
        failed_seg_status = []
        workers = min(len(self.gparray.get_hostlist()), self.parallel)
        self.pool = base.WorkerPool(numWorkers=workers, logger=logger)

        logger.info("Targeting dbid %s for shutdown" % [seg.getSegmentDbId() for seg in segs])


        if self.gparray.hasMirrors:
            # stop primaries
            logger.info("Commencing parallel primary segment instance shutdown, please wait...")
            try:
                self._stopseg_cmds(True, False, segs=segs)
            finally:
                self.pool.join()
            primary_success_seg_status = self._process_segment_stop(failed_seg_status)

            # stop mirrors
            logger.info("Commencing parallel mirror segment instance shutdown, please wait...")
            try:
                self._stopseg_cmds(False, True, segs=segs)
            finally:
                self.pool.join()
            mirror_success_seg_status = self._process_segment_stop(failed_seg_status)

            success_seg_status = primary_success_seg_status + mirror_success_seg_status
            self._print_segment_stop(segs, failed_seg_status, success_seg_status)

        else:
            logger.info("Commencing parallel segment instance shutdown, please wait...")
            # There are no active-mirrors
            try:
                self._stopseg_cmds(True, False, segs=segs)
            finally:
                self.pool.join()
            success_seg_status = self._process_segment_stop(failed_seg_status)

            self._print_segment_stop(segs, failed_seg_status, success_seg_status)
        pass

    ######
    def _stopseg_cmds(self, includePrimaries, includeMirrors, segs):
        host_segs_map = {}
        for seg in segs:
            if seg.getSegmentHostName() in list(host_segs_map.keys()):
                host_segs_map[seg.getSegmentHostName()].append(seg)
            else:
                host_segs_map[seg.getSegmentHostName()] = [seg]

        for hostname, gpdb_objs in host_segs_map.items():
            dbs = []
            for db in gpdb_objs:
                role = db.getSegmentRole()
                if role == 'p' and includePrimaries:
                    dbs.append(db)
                elif role != 'p' and includeMirrors:
                    dbs.append(db)

            # If we have no dbs then we have no segments of the type primary
            # or mirror.  This will occur when you have an entire host fail
            # when using group mirroring.  This is because all the mirror segs
            # on the alive host will be marked primary (or vice-versa)
            if len(dbs) == 0:
                continue

            logger.debug("Dispatching command to shutdown %d segments on host: %s" % (len(dbs), hostname))
            cmd = GpSegStopCmd("remote segment starts on host '%s'" % hostname, self.gphome, self.gpversion,
                                  mode=self.mode, dbs=dbs, timeout=self.timeout,
                                  verbose=logging_is_verbose(), ctxt=base.REMOTE, remoteHost=hostname,
                                  logfileDirectory=self.logfileDirectory)
            self.pool.addCommand(cmd)

        print_progress(self.pool)

    ######
    def _process_segment_stop(self, failed_seg_status):
        '''reviews results of gpsegstop commands '''
        success_seg_status = []
        seg_timed_out = False
        cmds = self.pool.getCompletedItems()

        for cmd in cmds:
            if cmd.get_results().rc == 0 or cmd.get_results().rc == 1:
                cmdout = cmd.get_results().stdout
                lines = cmdout.split('\n')
                for line in lines:
                    if line.startswith("STATUS"):
                        reasonStr, started, dir = parseStatusLine(line, isStop=True)[1:]

                        if started.lower() == 'false':
                            success = False
                        else:
                            success = True

                        for db in cmd.dblist:
                            if db.datadir == dir:
                                if success:
                                    success_seg_status.append(
                                        SegStopStatus(db, stopped=True, reason=reasonStr, failedCmd=cmd))
                                else:
                                    # dbs that are marked invalid are 'skipped' but we dispatch to them
                                    # anyway since we want to try and shutdown any runaway pg processes.
                                    failed_seg_status.append(
                                        SegStopStatus(db, stopped=False, reason=reasonStr, failedCmd=cmd))

                    elif line.strip().startswith('stderr: pg_ctl: server does not shut down'):
                        # We are assuming that we know what segment failed beforehand.
                        if failed_seg_status:
                            failed_seg_status[-1].timedOut = True
                        else:
                            logger.debug("No failed segments to time out")
            else:
                for db in cmd.dblist:
                    # dbs that are marked invalid are 'skipped' but we dispatch to them
                    # anyway since we want to try and shutdown any runaway pg processes.
                    if db.valid:
                        failed_seg_status.append(
                            SegStopStatus(db, stopped=False, reason=cmd.get_results(), failedCmd=cmd))

        self.pool.empty_completed_items()
        return success_seg_status


        ######

    def _print_segment_stop(self, segs, failed_seg_status, success_seg_status):
        stopped = len(segs) - len(failed_seg_status)
        failed = len([x for x in failed_seg_status if x.db.valid])
        invalid = self.gparray.get_invalid_segdbs()
        total_segs = len(self.gparray.getSegDbList())
        timed_out = len([x for x in failed_seg_status if x.timedOut])

        if failed > 0 or logging_is_verbose():
            logger.info("------------------------------------------------")
            if logging_is_verbose():
                logger.info("Segment Stop Information")
            else:
                logger.info("Failed Segment Stop Information ")

            logger.info("------------------------------------------------")
            if failed > 0:
                for failure in failed_seg_status:
                    logger.info(failure)
            if logging_is_verbose():
                for stat in success_seg_status:
                    logger.debug(stat)

        tabLog = TableLogger(logger=logger).setWarnWithArrows(True)
        tabLog.addSeparator()
        tabLog.info(["Segments stopped successfully", "= %d" % stopped])
        tabLog.infoOrWarn(failed > 0, ["Segments with errors during stop", "= %d" % failed])
        if invalid:
            tabLog.info([])
            tabLog.warn(["Segments that are currently marked down in configuration", "= %d" % len(invalid)])
            tabLog.info(["         (stop was still attempted on these segments)"])
        tabLog.addSeparator()

        tabLog.outputTable()

        flag = "" if failed == 0 else "<<<<<<<<"
        logger.info("Successfully shutdown %d of %d segment instances %s" % (stopped, total_segs, flag))

        if failed > 0:
            self.hadFailures = True
            logger.warning("------------------------------------------------")
            logger.warning("Segment instance shutdown failures reported")
            logger.warning("Failed to shutdown %d of %d segment instances <<<<<" % (failed, total_segs))
            if timed_out > 0:
                logger.warning("%d segments did not complete their shutdown in the allowed" % timed_out)
                logger.warning("timeout of %d seconds.  These segments are still in the process" % self.timeout)
                logger.warning("of shutting down.  You will not be able to restart the database")
                logger.warning("until all processes have terminated.")
            logger.warning("A total of %d errors were encountered" % failed)
            logger.warning("Review logfile %s" % get_logfile())
            logger.warning("For more details on segment shutdown failure(s)")
            logger.warning("------------------------------------------------")
        else:
            self.hadFailures = False
            logger.info("Database successfully shutdown with no errors reported")
        pass

    ######
    def _sighup_cluster(self):
        """ assumes prepare() has been called """
        workers = min(len(self.gparray.get_hostlist()), self.parallel)

        class SighupWorkerPool(base.WorkerPool):
            """
            This pool knows all the commands are calls to pg_ctl.
            The failed list collects segments without a running postmaster.
            """

            def __init__(self, numWorkers):
                base.WorkerPool.__init__(self, numWorkers)
                self.failed = []

            def check_results(self):
                while not self.completed_queue.empty():
                    item = self.completed_queue.get(False)
                    results = item.get_results()
                    if results.wasSuccessful():
                        continue
                    self.failed.append(item.db)

        self.pool = SighupWorkerPool(numWorkers=workers)
        dbList = self.gparray.getDbList()
        hostname = socket.gethostname()
        logger.info("Signalling all postmaster processes to reload")
        for db in dbList:
            ctxt = REMOTE
            remote_host = db.getSegmentHostName()
            if db.getSegmentHostName() == hostname:
                ctxt = LOCAL
                remote_host = None
            cmd = pg.ReloadDbConf(name="reload segment number " + str(db.getSegmentDbId()),
                                  db=db,
                                  ctxt=ctxt,
                                  remoteHost=remote_host
                                  )
            self.pool.addCommand(cmd)

        if self.quiet:
            self.pool.join()
        else:
            base.join_and_indicate_progress(self.pool)

        self.pool.check_results()
        self.pool.empty_completed_items()

        if len(self.pool.failed) < 1:
            return 0

        logger.info("--------------------------------------------")
        logger.info("Some segment postmasters were not reloaded")
        logger.info("--------------------------------------------")
        tabLog = TableLogger().setWarnWithArrows(True)
        tabLog.info(["Host", "Datadir", "Port", "Status"])
        for db in self.pool.failed:
            tup = [db.getSegmentHostName(), db.getSegmentDataDirectory(), str(db.getSegmentPort()),
                   db.getSegmentStatus()]
            tabLog.info(tup)
        tabLog.outputTable()
        logger.info("--------------------------------------------")
        return 1

    ######
    def _summarize_actions(self, segs):
        logger.info("--------------------------------------------")
        logger.info("Coordinator instance parameters")
        logger.info("--------------------------------------------")

        tabLog = TableLogger(logger=logger).setWarnWithArrows(True)
        tabLog.info(["Coordinator Cloudberry instance process active PID", "= %s" % self.pid])
        tabLog.info(["Database", "= %s" % self.dburl.pgdb])
        tabLog.info(["Coordinator port", "= %s" % self.port])
        tabLog.info(["Coordinator directory", "= %s" % self.coordinator_datadir])
        tabLog.info(["Shutdown mode", "= %s" % self.mode])
        tabLog.info(["Timeout", "= %s" % self.timeout])

        standbyMsg = "On" if self.gparray.standbyCoordinator and self.stopstandby else "Off"
        tabLog.info(["Shutdown Coordinator standby host", "= %s" % standbyMsg])

        tabLog.outputTable()

        logger.info("--------------------------------------------")
        logger.info("Segment instances that will be shutdown:")
        logger.info("--------------------------------------------")

        tabLog = TableLogger(logger=logger).setWarnWithArrows(True)
        tabLog.info(["Host", "Datadir", "Port", "Status"])

        for db in segs:
            tabLog.info([db.getSegmentHostName(), db.getSegmentDataDirectory(),
                         str(db.getSegmentPort()), db.getSegmentStatus()])
        tabLog.outputTable()

    # ----------------------- Command line option parser ----------------------
    @staticmethod
    def createParser():
        parser = OptParser(option_class=OptChecker,
                           description="Stops a CBDB Array.",
                           version='%prog version $Revision$')
        parser.setHelp([])

        addStandardLoggingAndHelpOptions(parser, includeNonInteractiveOption=True, includeUsageOption=True)

        addTo = OptionGroup(parser, 'Connection options')
        parser.add_option_group(addTo)
        addCoordinatorDirectoryOptionForSingleClusterProgram(addTo)

        addTo = OptionGroup(parser, 'Instance shutdown options: ')
        parser.add_option_group(addTo)
        addTo.add_option('-f', '--fast', action='store_true', default=False,
                         help="<deprecated> Fast shutdown, active transactions interrupted and rolled back")
        addTo.add_option('-i', '--immediate', action='store_true', default=False,
                         help="<deprecated> Immediate shutdown, active transaction aborted.")
        addTo.add_option('-s', '--smart', action='store_true',
                         help="<deprecated> Smart shutdown, wait for active transaction to complete. [default]")
        addTo.add_option('-z', '--force', action='store_true', default=False,
                         help="<deprecated> Force shutdown of segment instances marked as invalid. Kill postmaster PID, " \
                              "delete /tmp lock files and remove segment instance postmaster.pid file.")
        addTo.add_option('-M', '--mode', type='choice', choices=['fast', 'immediate', 'smart'],
                         metavar='fast|immediate|smart', action='store', default='smart',
                         help='set the method of shutdown')

        addTo.add_option('-r', '--restart', action='store_true',
                         help='Restart Apache Cloudberry instance after successful gpstop.')
        addTo.add_option('-m', '--master_only', '-c', '--coordinator_only', dest='coordinator_only', action='store_true',
                         help='stop coordinator instance started in maintenance mode')
        addTo.add_option('-y', dest="stop_standby", action='store_false', default=True,
                         help='Do not stop the standby coordinator process.')
        addTo.add_option('-u', dest="request_sighup", action='store_true',
                         help="upload new coordinator postgresql.conf settings, does not stop Cloudberry array," \
                              "issues a signal to the coordinator segment postmaster process to reload")

        addTo.add_option('-B', '--parallel', type="int", default=DEFAULT_NUM_WORKERS, metavar="<parallel_processes>",
                         help='number of segment hosts to run in parallel. Default is %d' % DEFAULT_NUM_WORKERS)
        addTo.add_option('-t', '--timeout', dest='timeout', default=SEGMENT_STOP_TIMEOUT_DEFAULT, type='int',
                         help='time to wait for segment stop (in seconds)')
        addTo.add_option('--host', dest='only_this_host', type='string',
                         help='stop all segments on this host (this will only complete if failover segments are available). '
                              'hostname as displayed in gp_segment_configuration')
        addTo.add_option('--skipvalidation', action='store_true', default=False,
                         help="<development testing only> DO NOT USE")
        addTo.add_option('-F', dest='fts_hosts', type='string',default=None ,
                         help='specify the file that contains all fts hosts.If this argument is set, `gpstop` will attempt'
                                'to stop all fts in the specified hosts, if not, `gpstop` will stop all fts spectified in the'
                                '$COORDINATOR_DATA_DIRECTORY/config/fts_host')
        addTo.add_option('-E', dest='etcd_hosts', type='string',default=None ,
                         help='specify the file that contains all etcd hosts.If this argument is set, `gpstop` will attempt'
                                'to stop all etcd in the specified hosts')
        parser.set_defaults(verbose=False, filters=[], slice=(None, None))
        return parser

    @staticmethod
    def createProgram(options, args):
        logfileDirectory = options.ensure_value("logfileDirectory", False)
        external_fts = is_external_fts()

        if external_fts and (options.fts_hosts or options.etcd_hosts):
            ProgramArgumentValidationException("internal fts not suopport -F and -E")
        if options.mode != 'smart':
            if options.fast or options.immediate:
                raise ProgramArgumentValidationException("Can not mix --mode options with older deprecated '-f,-i,-s'")

        if options.coordinator_only and options.only_this_host:
            raise ProgramArgumentValidationException("Incompatible flags. Cannot mix '--host' option with stopping "
                                                     "coordinator-only.")

        if options.restart and options.only_this_host:
            raise ProgramArgumentValidationException("Incompatible flags. Cannot mix '--host' option with '-r' for "
                                                     "restart.")

        if options.request_sighup and options.only_this_host:
            raise ProgramArgumentValidationException("Incompatible flags. Cannot mix '--host' option with '-u' for "
                                                     "config reload.")

        if (not options.stop_standby) and options.only_this_host:
            raise ProgramArgumentValidationException("Incompatible flags. Cannot mix '--host' option with '-y' for "
                                                     "skipping standby.")

        if options.fast:
            options.mode = "fast"
        if options.immediate:
            options.mode = "immediate"
        if options.smart:
            options.mode = "smart"

        # deprecating force option.  it no longer kills -9 things.
        # just make it stop fast instead.
        if options.force:
            options.mode = "fast"

        proccount = os.environ.get('GP_MGMT_PROCESS_COUNT')
        if options.parallel == 64 and proccount is not None:
            options.parallel = int(proccount)

        # -n sanity check
        if options.parallel > 128 or options.parallel < 1:
            raise ProgramArgumentValidationException("Invalid value for parallel degree: %s" % options.parallel)

        # Don't allow them to go below default
        if options.timeout < SEGMENT_STOP_TIMEOUT_DEFAULT and not options.skipvalidation:
            raise ProgramArgumentValidationException(
                "Invalid timeout value.  Must be greater than %s seconds." % SEGMENT_STOP_TIMEOUT_DEFAULT)
        
        if is_external_fts:
            if options.fts_hosts is None:
                coordinator_data_directory = os.getenv('COORDINATOR_DATA_DIRECTORY')
                if coordinator_data_directory is None:
                    coordinator_data_directory = options.coordinatorDataDirectory
                
                options.fts_hosts = coordinator_data_directory + '/config' + '/fts_host'

        if args:
            raise ProgramArgumentValidationException(
                "Argument %s is invalid.  Is an option missing a parameter?" % args[-1])

        return GpStop(options.mode,
                      coordinator_datadir=options.coordinatorDataDirectory,
                      parallel=options.parallel,
                      quiet=options.quiet,
                      coordinatoronly=options.coordinator_only,
                      sighup=options.request_sighup,
                      interactive=options.interactive,
                      stopstandby=options.stop_standby,
                      restart=options.restart,
                      timeout=options.timeout,
                      logfileDirectory=logfileDirectory,
                      onlyThisHost=options.only_this_host,
                      fts_hosts=options.fts_hosts,
                      etcd_hosts=options.etcd_hosts,
                      is_external_fts=external_fts)

if __name__ == '__main__':
    simple_main(GpStop.createParser, GpStop.createProgram)
