qiita / Git / [973924] /qiita_db/processing

Models:
AlyssaS/
qiita
Downloads: 1
[973924]: / qiita_db / processing_job.py
History
Download this file
2542 lines (2233 with data), 107.6 kB

# -----------------------------------------------------------------------------
# Copyright (c) 2014--, The Qiita Development Team.
#
# Distributed under the terms of the BSD 3-clause License.
#
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------

import networkx as nx
import qiita_db as qdb
import pandas as pd
from numpy import log as nlog # noqa

from collections import defaultdict, Iterable
from datetime import datetime, timedelta
from itertools import chain
from json import dumps, loads
from multiprocessing import Process, Queue, Event
from re import search, findall
from subprocess import Popen, PIPE
from time import sleep
from uuid import UUID
from os.path import join
from humanize import naturalsize
from os import environ

from qiita_core.qiita_settings import qiita_config
from qiita_db.util import create_nested_path


class Watcher(Process):
    # TODO: Qiita will need a proper mapping of these states to Qiita states
    # Currently, these strings are being inserted directly into Qiita's status
    # table. Qiita will be unfamiliar with many of these. We will need at least
    # one additional job type for 'Held': A job waiting for another to complete
    # before it can run.
    #
    # Note that the main Qiita script instantiates an object of this class in
    # a separate thread, so it can periodically update the database w/metadata
    # from Watcher's queue. Qiita's script also calls qdb.complete() so there
    # are no circular references. TODO: replace w/a REST call.

    # valid Qiita states:
    #             The current status of the job, one of {'queued', 'running',
    #             'success', 'error', 'in_construction', 'waiting'}

    # TODO: what to map in_construction to?
    job_state_map = {'C': 'completed', 'E': 'exiting', 'H': 'held',
                     'Q': 'queued', 'R': 'running', 'T': 'moving',
                     'W': 'waiting', 'S': 'suspended'}

    # TODO: moving, waiting, and suspended have been mapped to
    # 'running' in Qiita, as 'waiting' in Qiita connotes that the
    # main job itself has completed, and is waiting on validator
    # jobs to finish, etc. Revisit
    job_scheduler_to_qiita_state_map = {'completed': 'completed',
                                        'held': 'queued',
                                        'queued': 'queued',
                                        'exiting': 'running',
                                        'running': 'running',
                                        'moving': 'running',
                                        'waiting': 'running',
                                        'suspended': 'running',
                                        'DROPPED': 'error'}

    def __init__(self):
        super(Watcher, self).__init__()

        # set self.owner to qiita, or whomever owns processes we need to watch.
        self.owner = qiita_config.job_scheduler_owner

        # Setting a polling value less than 60 seconds allows for multiple
        # chances to catch the exit status before it disappears.
        self.polling_value = qiita_config.job_scheduler_poll_val

        # the cross-process method by which to communicate across
        # process boundaries. Note that when Watcher object runs,
        # another process will get created, and receive a copy of
        # the Watcher object. At this point, these self.* variables
        # become local to each process. Hence, the main process
        # can't see self.processes for example; theirs will just
        # be empty.
        self.queue = Queue()
        self.processes = {}

        # the cross-process sentinel value to shutdown Watcher
        self.event = Event()

    def _element_extract(self, snippet, list_of_elements,
                         list_of_optional_elements):
        results = {}
        missing_elements = []

        for element in list_of_elements:
            value = search('<%s>(.*?)</%s>' % (element, element), snippet)
            if value:
                results[element] = value.group(1)
            else:
                missing_elements.append(element)

        if missing_elements:
            raise AssertionError("The following elements were not found: %s"
                                 % ', '.join(missing_elements))

        for element in list_of_optional_elements:
            value = search('<%s>(.*?)</%s>' % (element, element), snippet)
            if value:
                results[element] = value.group(1)

        return results

    def _process_dependent_jobs(self, results):
        # when a job has its status changed, check to see if the job completed
        # with an error. If so, check to see if it had any jobs that were being
        # 'held' on this job's successful completion. If we are maintaining
        # state on any of these jobs, mark them as 'DROPPED', because they will
        # no longer appear in qstat output.
        if results['job_state'] == 'completed':
            if results['exit_status'] == '0':
                return

            if 'depend' in results:
                tmp = results['depend'].split(':')
                if tmp[0] == 'beforeok':
                    tmp.pop(0)
                    for child_job_id in tmp:
                        # jobs in 'beforeok' are labeled with the complete
                        # job id and what looks to be the server name doing
                        # the work. For now, simply remove the
                        # '@host.domain.org' (server) component.
                        child_job_id = child_job_id.split('@')[0]
                        self.processes[child_job_id]['job_state'] = 'DROPPED'
                        self.queue.put(self.processes[child_job_id])

    def run(self):
        # check to see if qstat is available. If not, exit immediately.
        proc = Popen("qstat -x", shell=True, stdout=PIPE, stderr=PIPE)
        proc.wait()
        if proc.returncode != 0:
            # inform any process expecting data from Watcher
            self.queue.put('QUIT')
            self.event.set()

        while not self.event.is_set():
            proc = Popen("qstat -x", shell=True, stdout=PIPE, stderr=PIPE)
            stdout, stderr = proc.communicate()
            if proc.returncode == 0:
                # qstat returned successfully with metadata on processes
                # break up metadata into individual <Job></Job> elements
                # for processing.
                m = findall('<Job>(.*?)</Job>', stdout.decode('ascii'))
                for item in m:
                    # filter out jobs that don't belong to owner
                    if search('<Job_Owner>%s</Job_Owner>' % self.owner, item):
                        # extract the metadata we want.
                        # if a job has completed, an exit_status element will
                        # be present. We also want that.
                        results = self._element_extract(item, ['Job_Id',
                                                               'Job_Name',
                                                               'job_state'],
                                                              ['depend'])
                        tmp = Watcher.job_state_map[results['job_state']]
                        results['job_state'] = tmp
                        if results['job_state'] == 'completed':
                            results2 = self._element_extract(item,
                                                             ['exit_status'],
                                                             [])
                            results['exit_status'] = results2['exit_status']

                        # determine if anything has changed since last poll
                        if results['Job_Id'] in self.processes:
                            if self.processes[results['Job_Id']] != results:
                                # metadata for existing job has changed
                                self.processes[results['Job_Id']] = results
                                self.queue.put(results)
                                self._process_dependent_jobs(results)
                        else:
                            # metadata for new job inserted
                            self.processes[results['Job_Id']] = results
                            self.queue.put(results)
            else:
                self.queue.put('QUIT')
                self.event.set()
                # don't join(), since we are exiting from the main loop

            sleep(self.polling_value)

    def stop(self):
        # 'poison pill' to thread/process
        self.queue.put('QUIT')
        # setting self.event is a safe way of communicating a boolean
        # value across processes and threads.
        # when this event is 'set' by the main line of execution in Qiita,
        # (or in any other process if need be), Watcher's run loop will
        # stop and the Watcher process will exit.
        self.event.set()
        # Here, it is assumed that we are running this from the main
        # context. By joining(), we're waiting for the Watcher process to
        # end before returning from this method.
        self.join()


def launch_local(env_script, start_script, url, job_id, job_dir):

    # launch_local() differs from launch_job_scheduler(), as no Watcher() is
    # used.
    # each launch_local() process will execute the cmd as a child process,
    # wait, and update the database once cmd has completed.
    #
    # As processes are lighter weight than jobs, this should be fine.
    # This is how the current job model works locally.
    cmd = [start_script, url, job_id, job_dir]
    print("ENV_SCRIPT: %s" % env_script)
    print("START_SCRIPT: %s" % start_script)
    print("URL: %s" % url)
    print("JOB ID: %s" % job_id)
    print("JOB DIR: %s" % job_dir)

    # When Popen() executes, the shell is not in interactive mode,
    # so it is not sourcing any of the bash configuration files
    # We need to source it so the env_script are available
    cmd = "bash -c '%s; %s'" % (env_script, ' '.join(cmd))
    print("CMD STRING: %s" % cmd)

    # Popen() may also need universal_newlines=True
    proc = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)

    # Communicate pulls all stdout/stderr from the PIPEs
    # This call waits until cmd is done
    stdout, stderr = proc.communicate()
    print("STDOUT: %s" % stdout)
    print("STDERR: %s" % stderr)

    # proc.returncode will be equal to None if the process hasn't finished
    # yet. If cmd was terminated by a SIGNAL, it will be a negative value.
    # (*nix platforms only)
    error = None
    print("RETURN CODE: %s" % proc.returncode)
    print("JOB ID: %s" % job_id)

    if proc.returncode != 0:
        error = "error from launch_local when launching cmd='%s'" % cmd
        error = "%s\n%s\n%s" % (error, stdout, stderr)

        # Forcing the creation of a new connection
        qdb.sql_connection.create_new_transaction()
        ProcessingJob(job_id).complete(False, error=error)


def launch_job_scheduler(env_script, start_script, url, job_id, job_dir,
                         dependent_job_id, resource_params):

    # note that job_id is Qiita's UUID, not a job_scheduler job ID
    cmd = [start_script, url, job_id, job_dir]

    lines = [
        '#!/bin/bash',
        f'#SBATCH --error {job_dir}/slurm-error.txt',
        f'#SBATCH --output {job_dir}/slurm-output.txt']
    lines.append("echo $SLURM_JOBID")
    lines.append("source ~/.bash_profile")
    lines.append(env_script)

    epilogue = environ.get('QIITA_JOB_SCHEDULER_EPILOGUE', '')
    if epilogue:
        lines.append(f"#SBATCH --epilog {epilogue}")

    lines.append(' '.join(cmd))

    # writing the script file
    create_nested_path(job_dir)

    fp = join(job_dir, '%s.txt' % job_id)

    with open(fp, 'w') as job_file:
        job_file.write("\n".join(lines))

    sbatch_cmd = ['sbatch']

    if dependent_job_id:
        # note that a dependent job should be submitted before the
        # 'parent' job ends
        sbatch_cmd.append("-d")
        sbatch_cmd.append("afterok:%s" % dependent_job_id)

    sbatch_cmd.append(resource_params)
    sbatch_cmd.append(fp)

    stdout, stderr, return_value = _system_call(' '.join(sbatch_cmd))

    if return_value != 0:
        raise AssertionError(f'Error submitting job: {sbatch_cmd} :: {stderr}')

    job_id = stdout.strip('\n').split(" ")[-1]

    return job_id


def _system_call(cmd):
    """Execute the command `cmd`

    Parameters
    ----------
    cmd : str
        The string containing the command to be run.

    Returns
    -------
    tuple of (str, str, int)
        The standard output, standard error and exist status of the
        executed command

    Notes
    -----
    This function is ported from QIIME (http://www.qiime.org), previously named
    qiime_system_call. QIIME is a GPL project, but we obtained permission from
    the authors of this function to port it to Qiita and keep it under BSD
    license.
    """
    proc = Popen(cmd, universal_newlines=True, shell=True, stdout=PIPE,
                 stderr=PIPE)
    # Communicate pulls all stdout/stderr from the PIPEs
    # This call blocks until the command is done
    stdout, stderr = proc.communicate()
    return_value = proc.returncode
    return stdout, stderr, return_value


class ProcessingJob(qdb.base.QiitaObject):
    r"""Models a job that executes a command in a set of artifacts

    Attributes
    ----------
    user
    command
    parameters
    status
    log
    heartbeat
    step

    Methods
    -------
    exists
    create
    """
    _table = 'processing_job'
    _launch_map = {'qiita-plugin-launcher':
                   {'function': launch_local,
                    'execute_in_process': False},
                   'qiita-plugin-launcher-slurm':
                   {'function': launch_job_scheduler,
                    'execute_in_process': True}}

    @classmethod
    def exists(cls, job_id):
        """Check if the job `job_id` exists

        Parameters
        ----------
        job_id : str
            The job id

        Returns
        -------
        bool
            True if the job `job_id` exists. False otherwise.
        """
        try:
            UUID(job_id)
        except ValueError:
            return False

        with qdb.sql_connection.TRN:
            sql = """SELECT EXISTS(SELECT *
                                   FROM qiita.processing_job
                                   WHERE processing_job_id = %s)"""
            qdb.sql_connection.TRN.add(sql, [job_id])
            return qdb.sql_connection.TRN.execute_fetchlast()

    @classmethod
    def by_external_id(cls, external_id):
        """Return Qiita Job UUID associated with external_id

        Parameters
        ----------
        external_id : str
            An external id (e.g. job scheduler Job ID)

        Returns
        -------
        str
            Qiita Job UUID, if found, otherwise None
        """
        with qdb.sql_connection.TRN:
            sql = """SELECT processing_job_id FROM qiita.processing_job
                     WHERE external_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [external_id])
            return cls(qdb.sql_connection.TRN.execute_fetchlast())

    @property
    def resource_allocation_info(self):
        """Return resource allocation defined for this job. For
        external computational resources only.

        Returns
        -------
        str
            A resource allocation string useful to the external resource
        """
        with qdb.sql_connection.TRN:
            analysis = None
            if self.command.name == 'complete_job':
                jtype = 'COMPLETE_JOBS_RESOURCE_PARAM'
                params = self.parameters.values
                v = loads(params['payload'])
                # assume an empty string for name is preferable to None
                name = ''
                if v['artifacts'] is not None:
                    an_element = list(v['artifacts'].keys())[0]
                    name = v['artifacts'][an_element]['artifact_type']
                # for analysis we have two options, either use the
                # input_artifacts or use the parameter 'analysis' of the job
                # to complete
                job = ProcessingJob(params['job_id'])
                params = job.parameters.values
                ia = job.input_artifacts
                if 'analysis' in params and params['analysis'] is not None:
                    analysis = qdb.analysis.Analysis(params['analysis'])
                elif ia:
                    analysis = ia[0].analysis
            elif self.command.name == 'release_validators':
                jtype = 'RELEASE_VALIDATORS_RESOURCE_PARAM'
                tmp = ProcessingJob(self.parameters.values['job'])
                name = tmp.parameters.command.name
                if tmp.input_artifacts:
                    analysis = tmp.input_artifacts[0].analysis
            elif self.command.name == 'Validate':
                jtype = 'VALIDATOR'
                vals = self.parameters.values
                name = vals['artifact_type']
                if vals['analysis'] is not None:
                    analysis = qdb.analysis.Analysis(vals['analysis'])
            elif self.id == 'register':
                jtype = 'REGISTER'
                name = 'REGISTER'
            else:
                # assume anything else is a command
                jtype = 'RESOURCE_PARAMS_COMMAND'
                name = self.command.name
                # for analysis we have two options, either use the
                # input_artifacts or use the parameter 'analysis' of self
                params = self.parameters.values
                ia = self.input_artifacts
                if 'analysis' in params and params['analysis'] is not None:
                    analysis = qdb.analysis.Analysis(params['analysis'])
                elif ia:
                    analysis = ia[0].analysis

            # first, query for resources matching name and type
            sql = """SELECT allocation FROM
                     qiita.processing_job_resource_allocation
                     WHERE name = %s and job_type = %s"""
            qdb.sql_connection.TRN.add(sql, [name, jtype])

            result = qdb.sql_connection.TRN.execute_fetchflatten()

            # if no matches for both type and name were found, query the
            # 'default' value for the type

            if not result:
                sql = """SELECT allocation FROM
                         qiita.processing_job_resource_allocation WHERE
                         name = %s and job_type = %s"""
                qdb.sql_connection.TRN.add(sql, ['default', jtype])

                result = qdb.sql_connection.TRN.execute_fetchflatten()
                if not result:
                    AssertionError(
                        "Could not match %s to a resource allocation!" % name)

            allocation = result[0]
            # adding user_level extra parameters
            allocation = f'{allocation} {self.user.slurm_parameters}'.strip()
            # adding analysis reservation
            if analysis is not None:
                sr = analysis.slurm_reservation
                if sr is not None:
                    allocation = f'{allocation} --reservation {sr}'

            if ('{samples}' in allocation or '{columns}' in allocation or
                    '{input_size}' in allocation):
                samples, columns, input_size = self.shape
                parts = []
                error_msg = ('Obvious incorrect allocation. Please '
                             'contact %s' % qiita_config.help_email)
                for part in allocation.split('--'):
                    param = ''
                    if part.startswith('time '):
                        param = 'time '
                    elif part.startswith('mem '):
                        param = 'mem '
                    else:
                        # if parts is empty, this is the first part so no --
                        if parts:
                            parts.append(f'--{part.strip()}')
                        else:
                            parts.append(part.strip())
                        continue

                    part = part[len(param):]
                    if ('{samples}' in part or '{columns}' in part or
                            '{input_size}' in part):
                        # to make sure that the formula is correct and avoid
                        # possible issues with conversions, we will check that
                        # all the variables {samples}/{columns}/{input_size}
                        # present in the formula are not None, if any is None
                        # we will set the job's error (will stop it) and the
                        # message is gonna be shown to the user within the job
                        if (('{samples}' in part and samples is None) or
                                ('{columns}' in part and columns is None) or
                                ('{input_size}' in part and input_size is
                                 None)):
                            self._set_error(error_msg)
                            return 'Not valid'

                        try:
                            # if eval has something that can't be processed
                            # it will raise a NameError
                            value = eval(part.format(
                                samples=samples, columns=columns,
                                input_size=input_size))
                        except NameError:
                            self._set_error(error_msg)
                            return 'Not valid'
                        else:
                            if value <= 0:
                                self._set_error(error_msg)
                                return 'Not valid'

                            if param == 'time ':
                                td = timedelta(seconds=value)
                                if td.days > 0:
                                    days = td.days
                                    td = td - timedelta(days=days)
                                    part = f'{days}-{str(td)}'
                                else:
                                    part = str(td)
                                part = part.split('.')[0]
                            else:
                                part = naturalsize(
                                    value, gnu=True, format='%.0f')
                    parts.append(f'--{param}{part}'.strip())

                allocation = ' '.join(parts)

            return allocation

    @classmethod
    def create(cls, user, parameters, force=False):
        """Creates a new job in the system

        Parameters
        ----------
        user : qiita_db.user.User
            The user executing the job
        parameters : qiita_db.software.Parameters
            The parameters of the job being executed
        force : bool
            Force creation on duplicated parameters

        Returns
        -------
        qiita_db.processing_job.ProcessingJob
            The newly created job

        Notes
        -----
        If force is True the job is going to be created even if another job
        exists with the same parameters
        """
        TTRN = qdb.sql_connection.TRN
        with TTRN:
            command = parameters.command
            if not force:
                # check if a job with the same parameters already exists
                sql = """SELECT processing_job_id, email,
                        processing_job_status, COUNT(aopj.artifact_id)
                     FROM qiita.processing_job
                     LEFT JOIN qiita.processing_job_status
                        USING (processing_job_status_id)
                     LEFT JOIN qiita.artifact_output_processing_job aopj
                        USING (processing_job_id)
                     WHERE command_id = %s AND processing_job_status IN (
                        'success', 'waiting', 'running', 'in_construction') {0}
                     GROUP BY processing_job_id, email,
                        processing_job_status"""

                # we need to use ILIKE because of booleans as they can be
                # false or False
                params = []
                for k, v in parameters.values.items():
                    # this is necessary in case we have an Iterable as a value
                    # but that is string
                    if isinstance(v, Iterable) and not isinstance(v, str):
                        for vv in v:
                            params.extend([k, str(vv)])
                    else:
                        params.extend([k, str(v)])

                if params:
                    # divided by 2 as we have key-value pairs
                    len_params = int(len(params)/2)
                    sql = sql.format(' AND ' + ' AND '.join(
                        ["command_parameters->>%s ILIKE %s"] * len_params))
                    params = [command.id] + params
                    TTRN.add(sql, params)
                else:
                    # the sql variable expects the list of parameters but if
                    # there is no param we need to replace the {0} with an
                    # empty string
                    TTRN.add(sql.format(""), [command.id])

                # checking that if the job status is success, it has children
                # [2] status, [3] children count
                existing_jobs = [r for r in TTRN.execute_fetchindex()
                                 if r[2] != 'success' or r[3] > 0]
                if existing_jobs:
                    raise ValueError(
                        'Cannot create job because the parameters are the '
                        'same as jobs that are queued, running or already '
                        'have succeeded:\n%s' % '\n'.join(
                            ["%s: %s" % (jid, status)
                             for jid, _, status, _ in existing_jobs]))

            sql = """INSERT INTO qiita.processing_job
                        (email, command_id, command_parameters,
                         processing_job_status_id)
                     VALUES (%s, %s, %s, %s)
                     RETURNING processing_job_id"""
            status = qdb.util.convert_to_id(
                "in_construction", "processing_job_status")
            sql_args = [user.id, command.id,
                        parameters.dump(), status]
            TTRN.add(sql, sql_args)
            job_id = TTRN.execute_fetchlast()

            # Link the job with the input artifacts
            sql = """INSERT INTO qiita.artifact_processing_job
                        (artifact_id, processing_job_id)
                     VALUES (%s, %s)"""
            pending = defaultdict(dict)
            for pname, vals in command.parameters.items():
                if vals[0] == 'artifact':
                    artifact_info = parameters.values[pname]
                    # If the artifact_info is a list, then the artifact
                    # still doesn't exist because the current job is part
                    # of a workflow, so we can't link
                    if not isinstance(artifact_info, list):
                        TTRN.add(sql, [artifact_info, job_id])
                    else:
                        pending[artifact_info[0]][pname] = artifact_info[1]
                elif pname == 'artifact':
                    TTRN.add(sql, [parameters.values[pname], job_id])

            if pending:
                sql = """UPDATE qiita.processing_job
                         SET pending = %s
                         WHERE processing_job_id = %s"""
                TTRN.add(sql, [dumps(pending), job_id])

            TTRN.execute()

            return cls(job_id)

    @property
    def user(self):
        """The user that launched the job

        Returns
        -------
        qiita_db.user.User
            The user that launched the job
        """
        with qdb.sql_connection.TRN:
            sql = """SELECT email
                     FROM qiita.processing_job
                     WHERE processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            email = qdb.sql_connection.TRN.execute_fetchlast()
            return qdb.user.User(email)

    @property
    def command(self):
        """The command that the job executes

        Returns
        -------
        qiita_db.software.Command
            The command that the job executes
        """
        with qdb.sql_connection.TRN:
            sql = """SELECT command_id
                     FROM qiita.processing_job
                     WHERE processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            cmd_id = qdb.sql_connection.TRN.execute_fetchlast()
            return qdb.software.Command(cmd_id)

    @property
    def parameters(self):
        """The parameters used in the job's command

        Returns
        -------
        qiita_db.software.Parameters
            The parameters used in the job's command
        """
        with qdb.sql_connection.TRN:
            sql = """SELECT command_id, command_parameters
                     FROM qiita.processing_job
                     WHERE processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            res = qdb.sql_connection.TRN.execute_fetchindex()[0]
            return qdb.software.Parameters.load(
                qdb.software.Command(res[0]), values_dict=res[1])

    @property
    def input_artifacts(self):
        """The artifacts used as input in the job

        Returns
        -------
        list of qiita_db.artifact.Artifact
            The artifacs used as input in the job
        """
        with qdb.sql_connection.TRN:
            sql = """SELECT artifact_id
                     FROM qiita.artifact_processing_job
                     WHERE processing_job_id = %s
                     ORDER BY artifact_id"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            return [qdb.artifact.Artifact(aid)
                    for aid in qdb.sql_connection.TRN.execute_fetchflatten()]

    @property
    def status(self):
        """The status of the job

        Returns
        -------
        str
            The current status of the job, one of {'queued', 'running',
            'success', 'error', 'in_construction', 'waiting'}

        """
        with qdb.sql_connection.TRN:
            sql = """SELECT processing_job_status
                     FROM qiita.processing_job_status
                        JOIN qiita.processing_job
                            USING (processing_job_status_id)
                     WHERE processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            return qdb.sql_connection.TRN.execute_fetchlast()

    def _generate_notification_message(self, value, error_msg):
        ignored_software = ('artifact definition',)
        ignored_commands = ('Validate', 'complete_job', 'release_validators')

        # abort early conditions (don't send an email notification)
        # tentatively accept the overhead of a function-call, even when a
        # notification isn't sent, just to keep the logic clean and
        # centralized.

        if value == 'waiting':
            # notification not needed.
            return None

        if not self.user.info['receive_processing_job_emails']:
            # notification not needed.
            return None

        if self.command.software.name in ignored_software:
            # notification not needed.
            return None

        if self.command.name in ignored_commands:
            # notification not needed.
            return None

        # generate subject line
        subject = (f'{self.command.name}: {value}, {self.id} '
                   f'[{self.external_id}]')

        # generate message line
        message = ''

        input_artifacts = self.input_artifacts
        if input_artifacts is None:
            # this is an admin job. display command name and parameters
            message = (f'Admin Job {self.command.name} '
                       f'{self.command.parameters}')
        else:
            for artifact in input_artifacts:
                if artifact.prep_templates:
                    # this is a processing job. display the study id as link,
                    # prep ids, data_type, and command name.
                    study_ids = [x.study_id for x in artifact.prep_templates]
                    prep_ids = [x.id for x in artifact.prep_templates]
                    data_types = [x.data_type() for x in
                                  artifact.prep_templates]

                    # there should only be one study id
                    study_ids = set(study_ids)
                    if len(study_ids) > 1:
                        raise qdb.exceptions.QiitaError("More than one Study "
                                                        "ID was found: "
                                                        f"{study_ids}")
                    study_id = study_ids.pop()

                    # there should be at least one prep_id and probably more.
                    prep_ids = list(set(prep_ids))
                    if len(prep_ids) == 0:
                        raise qdb.exceptions.QiitaError("No Prep IDs were "
                                                        "found")
                    if len(prep_ids) == 1:
                        study_url = (f'{qiita_config.base_url}/study/'
                                     f'description/{study_id}?prep_id='
                                     f'{prep_ids[0]}')
                    else:
                        study_url = (f'{qiita_config.base_url}/study/'
                                     f'description/{study_id}')
                    # convert into a string for presentation.
                    prep_ids = [str(x) for x in prep_ids]
                    prep_ids = ', '.join(prep_ids)

                    # there should be only one data type.
                    data_types = set(data_types)
                    if len(data_types) > 1:
                        raise qdb.exceptions.QiitaError("More than one data "
                                                        "type was found: "
                                                        f"{data_types}")
                    data_type = data_types.pop()

                    message = f'{self.command.name}\n'
                    message += f'Prep IDs: {prep_ids}\n'
                    message += f'{study_url}\n'
                    message += f'Data Type: {data_type}\n'
                elif artifact.analysis:
                    # this is an analysis job. display analysis id as link and
                    # the command name.
                    message = f'Analysis Job {self.command.name}\n'
                    message += f'{qiita_config.base_url}/analysis/'
                    message += f'description/{artifact.analysis.id}/\n'
                else:
                    raise qdb.exceptions.QiitaError("Unknown Condition")

        # append legacy message line
        message += 'New status: %s' % (value)

        if value == 'error' and error_msg is not None:
            message += f'\n\nError:\n{error_msg}'

        return {'subject': subject, 'message': message}

    def _set_status(self, value, error_msg=None):
        """Sets the status of the job

        Parameters
        ----------
        value : str, {'queued', 'running', 'success', 'error',
                      'in_construction', 'waiting'}
            The new status of the job
        error_msg : str, optional
            If not None this is the message that is going to be sent to the
            user when the value is 'error'

        Raises
        ------
        qiita_db.exceptions.QiitaDBStatusError
            - If the current status of the job is 'success'
            - If the current status of the job is 'running' and `value` is
            'queued'
        """
        with qdb.sql_connection.TRN:
            current_status = self.status
            if current_status == 'success':
                raise qdb.exceptions.QiitaDBStatusError(
                    "Cannot change the status of a 'success' job")
            elif current_status == 'running' and value == 'queued':
                raise qdb.exceptions.QiitaDBStatusError(
                    "Cannot revert the status of a 'running' job to 'queued'")

            new_status = qdb.util.convert_to_id(
                value, "processing_job_status")

            msg = self._generate_notification_message(value, error_msg)
            if msg is not None:
                # send email
                qdb.util.send_email(self.user.email, msg['subject'],
                                    msg['message'])
                # send email to our sys-admin if error from admin
                if self.user.level in {'admin', 'wet-lab admin'}:
                    if value == 'error':
                        qdb.util.send_email(
                            qiita_config.sysadmin_email, msg['subject'],
                            msg['message'])

            sql = """UPDATE qiita.processing_job
                     SET processing_job_status_id = %s
                     WHERE processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [new_status, self.id])
            qdb.sql_connection.TRN.execute()

    @property
    def external_id(self):
        """Retrieves the external id"""
        with qdb.sql_connection.TRN:
            sql = """SELECT external_job_id
                     FROM qiita.processing_job
                     WHERE processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            result = qdb.sql_connection.TRN.execute_fetchlast()
            if result is None:
                result = 'Not Available'
            return result

    @external_id.setter
    def external_id(self, value):
        """Sets the external job id of the job

        Parameters
        ----------
        value : str, {'queued', 'running', 'success', 'error',
                      'in_construction', 'waiting'}
            The job's new status

        Raises
        ------
        qiita_db.exceptions.QiitaDBStatusError
            - If the current status of the job is 'success'
            - If the current status of the job is 'running' and `value` is
            'queued'
        """
        sql = """UPDATE qiita.processing_job
                 SET external_job_id = %s
                 WHERE processing_job_id = %s"""
        qdb.sql_connection.perform_as_transaction(sql, [value, self.id])

    @property
    def release_validator_job(self):
        """Retrieves the release validator job

        Returns
        -------
        qiita_db.processing_job.ProcessingJob or None
            The release validator job of this job
        """
        rvalidator = None
        with qdb.sql_connection.TRN:
            sql = """SELECT processing_job_id
                     FROM qiita.processing_job
                     WHERE command_id in (
                         SELECT command_id
                         FROM qiita.software_command
                         WHERE name = 'release_validators')
                             AND command_parameters->>'job' = %s"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            results = qdb.sql_connection.TRN.execute_fetchflatten()
            if results:
                rvalidator = ProcessingJob(results[0])

        return rvalidator

    def submit(self, parent_job_id=None, dependent_jobs_list=None):
        """Submits the job to execution
        This method has the ability to submit itself, as well as a list of
        other ProcessingJob objects. If a list of ProcessingJob objects is
        supplied, they will be submitted conditionally on the successful
        execution of this object.

        Users of this method don't need to set parent_job_id. It is used
        internally by submit() for subsequent submit() calls for dependents.

        Raises
        ------
        QiitaDBOperationNotPermittedError
            If the job is not in 'waiting' or 'in_construction' status
        """
        with qdb.sql_connection.TRN:
            status = self.status
            if status not in {'in_construction', 'waiting'}:
                raise qdb.exceptions.QiitaDBOperationNotPermittedError(
                    "Can't submit job, not in 'in_construction' or "
                    "'waiting' status. Current status: %s" % status)
            self._set_status('queued')
            # At this point we are going to involve other processes. We need
            # to commit the changes to the DB or the other processes will not
            # see these changes
            qdb.sql_connection.TRN.commit()

        job_dir = join(qdb.util.get_work_base_dir(), self.id)
        command = self.command
        software = command.software
        cname = command.name
        plugin_start_script = software.start_script
        plugin_env_script = software.environment_script

        # Appending the portal URL so the job requests the information from the
        # portal server that submitted the job
        url = "%s%s" % (qiita_config.base_url, qiita_config.portal_dir)

        # if the word ENVIRONMENT is in the plugin_env_script we have a special
        # case where we are going to execute some command and then wait for the
        # plugin to return their own id (first implemented for
        # fast-bowtie2+woltka)
        #
        # This is the hardcoded lines described in issue:
        # https://github.com/qiita-spots/qiita/issues/3340
        # the idea is that in the future we shouldn't check specific command
        # names to know if it should be executed differently and the
        # plugin should let Qiita know that a specific command should be ran
        # as job array or not
        cnames_to_skip = {'Calculate Cell Counts', 'Calculate RNA Copy Counts'}
        if 'ENVIRONMENT' in plugin_env_script and cname not in cnames_to_skip:
            # the job has to be in running state so the plugin can change its`
            # status
            with qdb.sql_connection.TRN:
                self._set_status('running')
                qdb.sql_connection.TRN.commit()

            create_nested_path(job_dir)
            cmd = (f'{plugin_env_script}; {plugin_start_script} '
                   f'{url} {self.id} {job_dir}')
            stdout, stderr, return_value = _system_call(cmd)
            if return_value != 0 or stderr != '':
                self._set_error(stderr)
            job_id = stdout
        # note that dependent jobs, such as m validator jobs marshalled into
        # n 'queues' require the job_id returned by an external scheduler such
        # as Torque's MOAB, rather than a job name that can be defined within
        # Qiita. Hence, this method must be able to handle the case where a job
        # requires metadata from a late-defined and time-sensitive source.
        elif qiita_config.plugin_launcher in ProcessingJob._launch_map:
            launcher = ProcessingJob._launch_map[qiita_config.plugin_launcher]
            if launcher['execute_in_process']:
                # run this launcher function within this process.
                # usually this is done if the launcher spawns other processes
                # before returning immediately, usually with a job ID that can
                # be used to monitor the job's progress.

                try:
                    resource_params = self.resource_allocation_info
                except qdb.exceptions.QiitaDBUnknownIDError as e:
                    # this propagates the error to the job and using str(e)
                    # should be fine as we just want the last calculation
                    # error
                    self._set_error(str(e))

                # note that parent_job_id is being passed transparently from
                # submit declaration to the launcher.
                # TODO: In proc launches should throw exceptions, that are
                # handled by this code. Out of proc launches will need to
                # handle exceptions by catching them and returning an error
                # code.
                job_id = launcher['function'](plugin_env_script,
                                              plugin_start_script,
                                              url,
                                              self.id,
                                              job_dir,
                                              parent_job_id, resource_params)

                if dependent_jobs_list:
                    # a dependent_jobs_list will always have at least one
                    # job
                    next_job = dependent_jobs_list.pop(0)

                    if not dependent_jobs_list:
                        # dependent_jobs_list is now empty
                        dependent_jobs_list = None

                    # The idea here is that a list of jobs is considered a
                    # chain. Each job in the chain is submitted with the job
                    # id of job submitted before it; a job will only run if
                    # 'parent_job' ran successfully. Each iteration of submit()
                    # launches a job, pulls the next job from the list, and
                    # submits it. The remainder of the list is also passed to
                    # continue the process.
                    next_job.submit(parent_job_id=job_id,
                                    dependent_jobs_list=dependent_jobs_list)

            elif not launcher['execute_in_process']:
                # run this launcher function as a new process.
                # usually this is done if the launcher performs work that takes
                # an especially long time, or waits for children who perform
                # such work.
                p = Process(target=launcher['function'],
                            args=(plugin_env_script,
                                  plugin_start_script,
                                  url,
                                  self.id,
                                  job_dir))

                p.start()

                job_id = p.pid

                if dependent_jobs_list:
                    # for now, treat dependents as independent when
                    # running locally. This means they will not be
                    # organized into n 'queues' or 'chains', and
                    # will all run simultaneously.
                    for dependent in dependent_jobs_list:
                        # register dependent job as queued to make qiita
                        # aware of this child process
                        dependent._set_status('queued')

                        dep_software = dependent.command.software
                        dep_job_dir = join(qdb.util.get_work_base_dir(),
                                           dependent.id)
                        p = Process(target=launcher['function'],
                                    args=(dep_software.environment_script,
                                          dep_software.start_script,
                                          url,
                                          dependent.id,
                                          dep_job_dir))
                        p.start()
                        # assign the child process ID as external id to
                        # the dependent
                        dependent.external_id = p.pid
            else:
                error = ("execute_in_process must be defined",
                         "as either true or false")
                raise AssertionError(error)
        else:
            error = "plugin_launcher should be one of two values for now"
            raise AssertionError(error)

        # note that at this point, self.id is Qiita's UUID for a Qiita
        # job. job_id at this point is an external ID (e.g. Torque Job
        # ID). Record the mapping between job_id and self.id using
        # external_id.
        if job_id is not None:
            self.external_id = job_id

    def release(self):
        """Releases the job from the waiting status and creates the artifact

        Returns
        -------
        dict of {int: int}
            The mapping between the job output and the artifact
        """
        with qdb.sql_connection.TRN:
            if self.command.software.type != 'artifact definition':
                raise qdb.exceptions.QiitaDBOperationNotPermittedError(
                    "Only artifact definition jobs can be released")

            # Retrieve the artifact information from the DB
            sql = """SELECT artifact_info
                     FROM qiita.processing_job_validator
                     WHERE validator_id = %s"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            a_info = qdb.sql_connection.TRN.execute_fetchlast()

            provenance = loads(self.parameters.values['provenance'])
            job = ProcessingJob(provenance['job'])
            if 'data_type' in a_info:
                # This job is resulting from a private job
                parents = None
                params = None
                name = None
                data_type = a_info['data_type']
                pvals = job.parameters.values
                if 'analysis' in pvals:
                    cmd_out_id = None
                    analysis = qdb.analysis.Analysis(
                        job.parameters.values['analysis'])
                else:
                    cmd_out_id = provenance['cmd_out_id']
                    analysis = None
                a_info = a_info['artifact_data']
            else:
                # This job is resulting from a plugin job
                parents = job.input_artifacts
                params = job.parameters
                cmd_out_id = provenance['cmd_out_id']
                name = provenance['name']
                analysis = None
                data_type = None

            # Create the artifact
            atype = a_info['artifact_type']
            filepaths = a_info['filepaths']
            a = qdb.artifact.Artifact.create(
                filepaths, atype, parents=parents,
                processing_parameters=params,
                analysis=analysis, data_type=data_type, name=name)

            self._set_status('success')

            mapping = {}
            if cmd_out_id is not None:
                mapping = {cmd_out_id: a.id}

            return mapping

    def release_validators(self):
        """Allows all the validator job spawned by this job to complete"""
        if self.command.software.type not in ('artifact transformation',
                                              'private'):
            raise qdb.exceptions.QiitaDBOperationNotPermittedError(
                "Only artifact transformation and private jobs can "
                "release validators")

        # Check if all the validators are completed. Validator jobs can be
        # in two states when completed: 'waiting' in case of success
        # or 'error' otherwise

        validator_ids = ['%s [%s]' % (j.id, j.external_id)
                         for j in self.validator_jobs
                         if j.status not in ['waiting', 'error']]

        # Active polling - wait until all validator jobs are completed
        # TODO: As soon as we see one errored validator, we should kill
        # the other jobs and exit early. Don't wait for all of the jobs
        # to complete.
        while validator_ids:
            jids = ', '.join(validator_ids)
            self.step = ("Validating outputs (%d remaining) via "
                         "job(s) %s" % (len(validator_ids), jids))
            sleep(10)
            validator_ids = ['%s [%s]' % (j.id, j.external_id)
                             for j in self.validator_jobs
                             if j.status not in ['waiting', 'error']]

        # Check if any of the validators errored
        errored = [j for j in self.validator_jobs
                   if j.status == 'error']
        if errored:
            # At least one of the validators failed, Set the rest of the
            # validators and the current job as failed
            waiting = [j.id for j in self.validator_jobs
                       if j.status == 'waiting']

            common_error = "\n".join(
                ["Validator %s error message: %s" % (j.id, j.log.msg)
                 for j in errored])

            val_error = "%d sister validator jobs failed: %s" % (
                len(errored), common_error)
            for j in waiting:
                ProcessingJob(j)._set_error(val_error)

            self._set_error('%d validator jobs failed: %s'
                            % (len(errored), common_error))
        else:
            mapping = {}
            # Loop through all validator jobs and release them, allowing
            # to create the artifacts. Note that if any artifact creation
            # fails, the rollback operation will make sure that the
            # previously created artifacts are not in there
            for vjob in self.validator_jobs:
                mapping.update(vjob.release())

            if mapping:
                sql = """INSERT INTO
                            qiita.artifact_output_processing_job
                            (artifact_id, processing_job_id,
                            command_output_id)
                         VALUES (%s, %s, %s)"""
                sql_args = [[aid, self.id, outid]
                            for outid, aid in mapping.items()]
                with qdb.sql_connection.TRN:
                    qdb.sql_connection.TRN.add(sql, sql_args, many=True)
                self._update_and_launch_children(mapping)
            self._set_status('success')

    def _complete_artifact_definition(self, artifact_data):
        """"Performs the needed steps to complete an artifact definition job

        In order to complete an artifact definition job we need to create
        the artifact, and then start all the jobs that were waiting for this
        artifact to be created. Note that each artifact definition job creates
        one and only one artifact.

        Parameters
        ----------
        artifact_data : {'filepaths': list of (str, str), 'artifact_type': str}
            Dict with the artifact information. `filepaths` contains the list
            of filepaths and filepath types for the artifact and
            `artifact_type` the type of the artifact

        Notes
        -----
        The `provenance` in the job.parameters can contain a `direct_creation`
        flag to avoid having to wait for the complete job to create a new
        artifact, which is normally ran during regular processing. Skipping is
        fine because we are adding an artifact to an existing job outside of
        regular processing
        """
        with qdb.sql_connection.TRN:
            atype = artifact_data['artifact_type']
            filepaths = artifact_data['filepaths']
            # We need to differentiate if this artifact is the
            # result of a previous job or uploading
            job_params = self.parameters.values
            if job_params['provenance'] is not None:
                # The artifact is a result from a previous job
                provenance = loads(job_params['provenance'])
                if provenance.get('direct_creation', False):
                    original_job = ProcessingJob(provenance['job'])
                    artifact = qdb.artifact.Artifact.create(
                        filepaths, atype,
                        parents=original_job.input_artifacts,
                        processing_parameters=original_job.parameters,
                        analysis=job_params['analysis'],
                        name=job_params['name'])

                    sql = """
                        INSERT INTO qiita.artifact_output_processing_job
                            (artifact_id, processing_job_id,
                             command_output_id)
                         VALUES (%s, %s, %s)"""
                    qdb.sql_connection.TRN.add(
                        sql, [artifact.id, original_job.id,
                              provenance['cmd_out_id']])
                    qdb.sql_connection.TRN.execute()

                    self._set_status('success')
                else:
                    if provenance.get('data_type') is not None:
                        artifact_data = {'data_type': provenance['data_type'],
                                         'artifact_data': artifact_data}

                    sql = """UPDATE qiita.processing_job_validator
                             SET artifact_info = %s
                             WHERE validator_id = %s"""
                    qdb.sql_connection.TRN.add(
                        sql, [dumps(artifact_data), self.id])
                    qdb.sql_connection.TRN.execute()

                    # Can't create the artifact until all validators
                    # are completed
                    self._set_status('waiting')
            else:
                # The artifact is uploaded by the user or is the initial
                # artifact of an analysis
                if ('analysis' in job_params and
                        job_params['analysis'] is not None):
                    pt = None
                    an = qdb.analysis.Analysis(job_params['analysis'])
                    sql = """SELECT data_type
                             FROM qiita.analysis_processing_job
                             WHERE analysis_id = %s
                                AND processing_job_id = %s"""
                    qdb.sql_connection.TRN.add(sql, [an.id, self.id])
                    data_type = qdb.sql_connection.TRN.execute_fetchlast()
                elif job_params['template'] is not None:
                    pt = qdb.metadata_template.prep_template.PrepTemplate(
                        job_params['template'])
                    an = None
                    data_type = None
                else:
                    pt = None
                    an = None
                    data_type = 'Job Output Folder'

                artifact = qdb.artifact.Artifact.create(
                    filepaths, atype, prep_template=pt, analysis=an,
                    data_type=data_type, name=job_params['name'])
                self._set_status('success')

            # we need to update the children jobs to replace the input
            # for the newly created artifact via the validator
            for c in self.children:
                self._helper_update_children({atype: artifact.id})
                c.submit()

    def _complete_artifact_transformation(self, artifacts_data):
        """Performs the needed steps to complete an artifact transformation job

        In order to complete an artifact transformation job, we need to create
        a validate job for each artifact output and submit it.

        Parameters
        ----------
        artifacts_data : dict of dicts
            The generated artifact information keyed by output name.
            The format of each of the internal dictionaries must be
            {'filepaths': list of (str, str), 'artifact_type': str}
            where `filepaths` contains the list of filepaths and filepath types
            for the artifact and `artifact_type` the type of the artifact

        Raises
        ------
        QiitaDBError
            If there is more than one prep information attached to the new
            artifact
        """
        validator_jobs = []
        with qdb.sql_connection.TRN:
            cmd_id = self.command.id
            for out_name, a_data in artifacts_data.items():
                # Correct the format of the filepaths parameter so we can
                # create a validate job
                filepaths = defaultdict(list)
                for fp, fptype in a_data['filepaths']:
                    filepaths[fptype].append(fp)
                atype = a_data['artifact_type']

                # The validate job needs a prep information file. In theory,
                # a job can be generated from more that one prep information
                # file, so we check here if we have one or more templates. At
                # this moment, If we allow more than one template, there is a
                # fair amount of changes that need to be done on the plugins,
                # so we are going to restrict the number of templates to one.
                # Note that at this moment there is no way of generating an
                # artifact from 2 or more artifacts, so we can impose this
                # limitation now and relax it later.
                templates = set()
                for artifact in self.input_artifacts:
                    templates.update(pt.id for pt in artifact.prep_templates)
                template = None
                analysis = None
                if len(templates) > 1:
                    raise qdb.exceptions.QiitaDBError(
                        "Currently only single prep template "
                        "is allowed, found %d" % len(templates))
                elif len(templates) == 1:
                    template = templates.pop()
                elif self.input_artifacts:
                    # In this case we have 0 templates. What this means is that
                    # this artifact is being generated in the analysis pipeline
                    # All the artifacts included in the analysis pipeline
                    # belong to the same analysis, so we can just ask the
                    # first artifact for the analysis that it belongs to
                    analysis = self.input_artifacts[0].analysis.id

                # Once the validate job completes, it needs to know if it has
                # been generated from a command (and how) or if it has been
                # uploaded. In order to differentiate these cases, we populate
                # the provenance parameter with some information about the
                # current job and how this artifact has been generated. This
                # does not affect the plugins since they can ignore this
                # parameter
                sql = """SELECT command_output_id
                         FROM qiita.command_output
                         WHERE name = %s AND command_id = %s"""
                qdb.sql_connection.TRN.add(sql, [out_name, cmd_id])
                cmd_out_id = qdb.sql_connection.TRN.execute_fetchlast()
                naming_params = self.command.naming_order
                if naming_params:
                    params = self.parameters.values
                    art_name = "%s %s" % (
                        out_name, ' '.join([str(params[p]).split('/')[-1]
                                            for p in naming_params]))
                else:
                    art_name = out_name

                provenance = {'job': self.id,
                              'cmd_out_id': cmd_out_id,
                              'name': art_name}

                if self.command.software.type == 'private':
                    provenance['data_type'] = 'Job Output Folder'

                # Get the validator command for the current artifact type and
                # create a new job
                # see also release_validators()
                cmd = qdb.software.Command.get_validator(atype)
                values_dict = {
                    'files': dumps(filepaths), 'artifact_type': atype,
                    'template': template, 'provenance': dumps(provenance),
                    'analysis': None}
                if analysis is not None:
                    values_dict['analysis'] = analysis
                validate_params = qdb.software.Parameters.load(
                    cmd, values_dict=values_dict)

                validator_jobs.append(
                    ProcessingJob.create(self.user, validate_params, True))

            # Change the current step of the job
            self.step = "Validating outputs (%d remaining) via job(s) %s" % (
                len(validator_jobs), ', '.join(['%s [%s]' % (
                    j.id, j.external_id) for j in validator_jobs]))

            # Link all the validator jobs with the current job
            self._set_validator_jobs(validator_jobs)

            # Submit m validator jobs as n lists of jobs
            n = qiita_config.job_scheduler_dependency_q_cnt
            if n is None:
                n = 2

            # taken from:
            # https://www.geeksforgeeks.org/break-list-chunks-size-n-python/
            lists = [validator_jobs[i * n:(i + 1) * n]
                     for i in range((len(validator_jobs) + n - 1) // n)]

            for sub_list in lists:
                # each sub_list will always have at least a lead_job
                lead_job = sub_list.pop(0)
                if not sub_list:
                    # sub_list is now empty
                    sub_list = None
                lead_job.submit(dependent_jobs_list=sub_list)

            # Submit the job that will release all the validators
            plugin = qdb.software.Software.from_name_and_version(
                'Qiita', 'alpha')
            cmd = plugin.get_command('release_validators')
            params = qdb.software.Parameters.load(
                cmd, values_dict={'job': self.id})
            job = ProcessingJob.create(self.user, params)

        # Doing the submission outside of the transaction
        job.submit()

    def _set_validator_jobs(self, validator_jobs):
        """Sets the validator jobs for the current job

        Parameters
        ----------
        validator_jobs : list of ProcessingJob
            The validator_jobs for the current job
        """
        with qdb.sql_connection.TRN:
            sql = """INSERT INTO qiita.processing_job_validator
                        (processing_job_id, validator_id)
                     VALUES (%s, %s)"""
            sql_args = [[self.id, j.id] for j in validator_jobs]
            qdb.sql_connection.TRN.add(sql, sql_args, many=True)
            qdb.sql_connection.TRN.execute()

    def complete(self, success, artifacts_data=None, error=None):
        """Completes the job, either with a success or error status

        Parameters
        ----------
        success : bool
            Whether the job has completed successfully or not
        artifacts_data : dict of dicts, optional
            The generated artifact information keyed by output name.
            The format of each of the internal dictionaries must be
            {'filepaths': list of (str, str), 'artifact_type': str}
            where `filepaths` contains the list of filepaths and filepath types
            for the artifact and `artifact_type` the type of the artifact
        error : str, optional
            If the job was not successful, the error message

        Raises
        ------
        qiita_db.exceptions.QiitaDBOperationNotPermittedError
            If the job is not in running state
        """
        with qdb.sql_connection.TRN:
            if success:
                if self.status != 'running':
                    # If the job is not running, we only allow to complete it
                    # if it did not succeed
                    raise qdb.exceptions.QiitaDBOperationNotPermittedError(
                        "Can't complete job: not in a running state")
                if artifacts_data:
                    if self.command.software.type == 'artifact definition':
                        # There is only one artifact created
                        _, a_data = artifacts_data.popitem()
                        self._complete_artifact_definition(a_data)
                    else:
                        self._complete_artifact_transformation(artifacts_data)
                else:
                    self._set_status('success')
            else:
                self._set_error(error)

    @property
    def log(self):
        """The log entry attached to the job if it failed

        Returns
        -------
        qiita_db.logger.LogEntry or None
            If the status of the job is `error`, returns the LogEntry attached
            to the job
        """
        with qdb.sql_connection.TRN:
            res = None
            if self.status == 'error':
                sql = """SELECT logging_id
                         FROM qiita.processing_job
                         WHERE processing_job_id = %s"""
                qdb.sql_connection.TRN.add(sql, [self.id])
                log_id = qdb.sql_connection.TRN.execute_fetchlast()
                res = qdb.logger.LogEntry(log_id)
        return res

    def _set_error(self, error):
        """Attaches a log entry to the job

        Parameters
        ----------
        error : str
            The error message

        Raises
        ------
        qiita_db.exceptions.QiitaDBOperationNotPermittedError
            If the status of the job is 'success'
        """
        with qdb.sql_connection.TRN:
            if self.status == 'success':
                raise qdb.exceptions.QiitaDBOperationNotPermittedError(
                    "Can only set up the log for jobs whose status is 'error'")

            log = qdb.logger.LogEntry.create('Runtime', error)

            sql = """UPDATE qiita.processing_job
                     SET logging_id = %s
                     WHERE processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [log.id, self.id])
            qdb.sql_connection.TRN.execute()

            # All the children should be marked as failure
            for c in self.children:
                c.complete(False, error="Parent job '%s' failed." % self.id)

            # set as error after everything is in place
            self._set_status('error', error_msg=error)

    @property
    def heartbeat(self):
        """The timestamp of the last heartbeat received from the job

        Returns
        -------
        datetime
            The last heartbeat timestamp
        """
        with qdb.sql_connection.TRN:
            sql = """SELECT heartbeat
                     FROM qiita.processing_job
                     WHERE processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            return qdb.sql_connection.TRN.execute_fetchlast()

    def update_heartbeat_state(self):
        """Updates the heartbeat of the job

        In case that the job is in `queued` status, it changes the status to
        `running`.

        Raises
        ------
        QiitaDBOperationNotPermittedError
            If the job is already completed
        """
        with qdb.sql_connection.TRN:
            status = self.status
            if status == 'queued':
                self._set_status('running')
            elif status != 'running':
                raise qdb.exceptions.QiitaDBOperationNotPermittedError(
                    "Can't execute heartbeat on job: already completed")
            sql = """UPDATE qiita.processing_job
                     SET heartbeat = %s
                     WHERE processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [datetime.now(), self.id])
            qdb.sql_connection.TRN.execute()

    @property
    def step(self):
        """Returns the current step of the job

        Returns
        -------
        str
            The current step of the job
        """
        with qdb.sql_connection.TRN:
            sql = """SELECT step
                     FROM qiita.processing_job
                     WHERE processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            return qdb.sql_connection.TRN.execute_fetchlast()

    @step.setter
    def step(self, value):
        """Sets the current step of the job

        Parameters
        ----------
        value : str
            The new current step of the job

        Raises
        ------
        qiita_db.exceptions.QiitaDBOperationNotPermittedError
            If the status of the job is not 'running'
        """
        if self.status != 'running':
            raise qdb.exceptions.QiitaDBOperationNotPermittedError(
                "Cannot change the step of a job whose status is not "
                "'running'")
        sql = """UPDATE qiita.processing_job
                 SET step = %s
                 WHERE processing_job_id = %s"""
        qdb.sql_connection.perform_as_transaction(sql, [value, self.id])

    @property
    def children(self):
        """The children jobs

        Returns
        -------
        generator of qiita_db.processing_job.ProcessingJob
            The children jobs
        """
        with qdb.sql_connection.TRN:
            sql = """SELECT child_id
                     FROM qiita.parent_processing_job
                     WHERE parent_id = %s"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            for jid in qdb.sql_connection.TRN.execute_fetchflatten():
                yield ProcessingJob(jid)

    @property
    def validator_jobs(self):
        """The validators of this job

        Returns
        -------
        generator of qiita_db.processing_job.ProcessingJob
            The validators of this job
        """
        with qdb.sql_connection.TRN:
            sql = """SELECT validator_id
                     FROM qiita.processing_job_validator pjv
                     JOIN qiita.processing_job pj
                         ON pjv.validator_id = pj.processing_job_id
                     JOIN qiita.processing_job_status USING (
                        processing_job_status_id)
                     WHERE pjv.processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            for jid in qdb.sql_connection.TRN.execute_fetchflatten():
                yield ProcessingJob(jid)

    def _helper_update_children(self, new_map):
        ready = []
        sql = """SELECT command_parameters, pending
                 FROM qiita.processing_job
                 WHERE processing_job_id = %s"""
        sql_update = """UPDATE qiita.processing_job
                        SET command_parameters = %s,
                            pending = %s
                        WHERE processing_job_id = %s"""
        sql_link = """INSERT INTO qiita.artifact_processing_job
                        (artifact_id, processing_job_id)
                      VALUES (%s, %s)"""

        for c in self.children:
            qdb.sql_connection.TRN.add(sql, [c.id])
            params, pending = qdb.sql_connection.TRN.execute_fetchflatten()
            for pname, out_name in pending[self.id].items():
                a_id = new_map[out_name]
                params[pname] = str(a_id)
                del pending[self.id]
                # Link the input artifact with the child job
                qdb.sql_connection.TRN.add(sql_link, [a_id, c.id])

            # Force to insert a NULL in the DB if pending is empty
            pending = pending if pending else None
            qdb.sql_connection.TRN.add(sql_update,
                                       [dumps(params), pending, c.id])
            qdb.sql_connection.TRN.execute()

            if pending is None:
                # The child already has all the parameters
                # Add it to the ready list
                ready.append(c)

        return ready

    def _update_children(self, mapping):
        """Updates the children of the current job to populate the input params

        Parameters
        ----------
        mapping : dict of {int: int}
            The mapping between output parameter and artifact

        Returns
        -------
        list of qiita_db.processing_job.ProcessingJob
            The list of childrens that are ready to be submitted
        """
        with qdb.sql_connection.TRN:
            sql = """SELECT command_output_id, name
                     FROM qiita.command_output
                     WHERE command_output_id IN %s"""
            sql_args = [tuple(mapping.keys())]
            qdb.sql_connection.TRN.add(sql, sql_args)
            res = qdb.sql_connection.TRN.execute_fetchindex()
            new_map = {name: mapping[oid] for oid, name in res}

        return self._helper_update_children(new_map)

    def _update_and_launch_children(self, mapping):
        """Updates the children of the current job to populate the input params

        Parameters
        ----------
        mapping : dict of {int: int}
            The mapping between output parameter and artifact
        """
        ready = self._update_children(mapping)
        # Submit all the children that already have all the input parameters
        for c in ready:
            if c.status in {'in_construction', 'waiting'}:
                c.submit()
                # some jobs create several children jobs/validators and this
                # can clog the submission process; giving it a second to
                # avoid this
                sleep(1)

    @property
    def outputs(self):
        """The outputs of the job

        Returns
        -------
        dict of {str: qiita_db.artifact.Artifact}
            The outputs of the job keyed by output name
        """
        with qdb.sql_connection.TRN:
            if self.status != 'success':
                raise qdb.exceptions.QiitaDBOperationNotPermittedError(
                    "Can't return the outputs of a non-success job")

            sql = """SELECT artifact_id, name
                     FROM qiita.artifact_output_processing_job
                        JOIN qiita.command_output USING (command_output_id)
                     WHERE processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            return {
                name: qdb.artifact.Artifact(aid)
                for aid, name in qdb.sql_connection.TRN.execute_fetchindex()}

    @property
    def processing_job_workflow(self):
        """The processing job workflow

        Returns
        -------
        ProcessingWorkflow
            The processing job workflow the job
        """
        with qdb.sql_connection.TRN:
            # Retrieve the workflow root jobs
            sql = """SELECT get_processing_workflow_roots
                     FROM qiita.get_processing_workflow_roots(%s)"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            res = qdb.sql_connection.TRN.execute_fetchindex()
            if res:
                sql = """SELECT processing_job_workflow_id
                         FROM qiita.processing_job_workflow_root
                         WHERE processing_job_id = %s"""
                qdb.sql_connection.TRN.add(sql, [res[0][0]])
                r = qdb.sql_connection.TRN.execute_fetchindex()
                return (qdb.processing_job.ProcessingWorkflow(r[0][0]) if r
                        else None)
            else:
                return None

    @property
    def pending(self):
        """A dictionary with the information about the predecessor jobs

        Returns
        -------
        dict
            A dict with {job_id: {parameter_name: output_name}}"""
        with qdb.sql_connection.TRN:
            sql = """SELECT pending
                     FROM qiita.processing_job
                     WHERE processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            res = qdb.sql_connection.TRN.execute_fetchlast()
            return res if res is not None else {}

    @property
    def hidden(self):
        """Whether the job is hidden or not

        Returns
        -------
        bool
            Whether the jobs is hidden or not
        """
        with qdb.sql_connection.TRN:
            sql = """SELECT hidden
                     FROM qiita.processing_job
                     WHERE processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            return qdb.sql_connection.TRN.execute_fetchlast()

    def hide(self):
        """Hides the job from the user

        Raises
        ------
        QiitaDBOperationNotPermittedError
            If the job is not in the error status
        """
        with qdb.sql_connection.TRN:
            status = self.status
            if status != 'error':
                raise qdb.exceptions.QiitaDBOperationNotPermittedError(
                    'Only jobs in error status can be hidden. Current status: '
                    '%s' % status)
            sql = """UPDATE qiita.processing_job
                     SET hidden = %s
                     WHERE processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [True, self.id])
            qdb.sql_connection.TRN.execute()

    @property
    def shape(self):
        """Number of samples, metadata columns and input size of this job

        Returns
        -------
        int, int, int
            Number of samples, metadata columns and input size. None means it
            couldn't be calculated
        """
        samples = None
        columns = None
        prep_info = None
        study_id = None
        analysis_id = None
        artifact = None
        input_size = None

        parameters = self.parameters.values
        QUIDError = qdb.exceptions.QiitaDBUnknownIDError

        if self.command.name == 'Validate':
            # Validate only has two options to calculate it's size: template (a
            # job that has a preparation linked) or analysis (is from an
            # analysis). However, 'template' can be present and be None
            if 'template' in parameters and parameters['template'] is not None:
                try:
                    PT = qdb.metadata_template.prep_template.PrepTemplate
                    prep_info = PT(parameters['template'])
                except QUIDError:
                    pass
                else:
                    study_id = prep_info.study_id
            elif 'analysis' in parameters:
                analysis_id = parameters['analysis']
        elif self.command.name == 'build_analysis_files':
            # build analysis is a special case because the analysis doesn't
            # exist yet
            sanalysis = qdb.analysis.Analysis(parameters['analysis']).samples
            samples = sum([len(sams) for sams in sanalysis.values()])
            # only count the biom files
            input_size = sum([fp['fp_size'] for aid in sanalysis
                              for fp in qdb.artifact.Artifact(aid).filepaths
                              if fp['fp_type'] == 'biom'])
            columns = self.parameters.values['categories']
            if columns is not None:
                columns = len(columns)
        elif self.command.software.name == 'Qiita':
            if self.command.name == 'delete_sample_or_column':
                MT = qdb.metadata_template
                _id = parameters['obj_id']
                try:
                    if parameters['obj_class'] == 'SampleTemplate':
                        obj = MT.sample_template.SampleTemplate(_id)
                    else:
                        obj = MT.prep_template.PrepTemplate(_id)
                    samples = len(obj)
                except QUIDError:
                    pass
            else:
                if 'study' in parameters:
                    study_id = parameters['study']
                elif 'study_id' in parameters:
                    study_id = parameters['study_id']
                elif 'analysis' in parameters:
                    analysis_id = parameters['analysis']
                elif 'analysis_id' in parameters:
                    analysis_id = parameters['analysis_id']
                elif 'artifact' in parameters:
                    try:
                        artifact = qdb.artifact.Artifact(
                            parameters['artifact'])
                    except QUIDError:
                        pass
        elif self.command.name == 'Sequence Processing Pipeline':
            body = self.parameters.values['sample_sheet']['body']
            samples = body.count('\r')
            stemp = body.count('\n')
            if stemp > samples:
                samples = stemp
        elif self.input_artifacts:
            artifact = self.input_artifacts[0]
            if artifact.artifact_type == 'BIOM':
                input_size = sum([fp['fp_size'] for a in self.input_artifacts
                                  for fp in a.filepaths
                                  if fp['fp_type'] == 'biom'])
            else:
                input_size = sum([fp['fp_size'] for a in self.input_artifacts
                                  for fp in a.filepaths])

        # if there is an artifact, then we need to get the study_id/analysis_id
        if artifact is not None:
            if artifact.study is not None:
                # only count samples in the prep template
                prep_info = artifact.prep_templates[0]
                study_id = prep_info.study_id
            elif artifact.analysis is not None:
                analysis_id = artifact.analysis.id

        # now retrieve the sample/columns based on study_id/analysis_id
        if study_id is not None:
            try:
                st = qdb.study.Study(study_id).sample_template
            except QUIDError:
                pass
            else:
                if prep_info is not None:
                    samples = len(prep_info)
                    columns = len(prep_info.categories) + len(st.categories)
                elif st is not None:
                    samples = len(st)
                    columns = len(st.categories)
        elif analysis_id is not None:
            try:
                analysis = qdb.analysis.Analysis(analysis_id)
            except qdb.exceptions.QiitaDBUnknownIDError:
                pass
            else:
                mfp = qdb.util.get_filepath_information(
                    analysis.mapping_file)['fullpath']
                samples, columns = pd.read_csv(
                    mfp, sep='\t', dtype=str).shape
                input_size = sum([fp['fp_size'] for aid in analysis.samples for
                                  fp in qdb.artifact.Artifact(aid).filepaths])

        return samples, columns, input_size

    @property
    def complete_processing_job(self):
        sql = """SELECT processing_job_id FROM qiita.software_command
                    JOIN qiita.processing_job USING (command_id)
                    WHERE name = 'complete_job' AND
                        command_parameters->>'job_id' = %s LIMIT 1"""
        with qdb.sql_connection.TRN:
            qdb.sql_connection.TRN.add(sql, [self.id])
            result = qdb.sql_connection.TRN.execute_fetchflatten()

        if result:
            return qdb.processing_job.ProcessingJob(result[0])
        return None

    @property
    def trace(self):
        """ Returns as a text array the full trace of the job, from itself
            to validators and complete jobs"""
        lines = [f'{self.id} [{self.external_id}] ({self.status}): '
                 f'{self.command.name} | {self.resource_allocation_info}']
        cjob = self.complete_processing_job
        if cjob is not None:
            lines.append(f'  {cjob.id} [{cjob.external_id}] ({cjob.status})| '
                         f'{cjob.resource_allocation_info}')
            vjob = self.release_validator_job
            if vjob is not None:
                lines.append(f'    {vjob.id} [{vjob.external_id}] '
                             f' ({vjob.status}) | '
                             f'{vjob.resource_allocation_info}')
        for v in self.validator_jobs:
            lines.append(f'     {v.id} [{v.external_id}] ({v.status}): '
                         f'{v.command.name} | {v.resource_allocation_info}')
            cjob = v.complete_processing_job
            if cjob is not None:
                lines.append(f'         {cjob.id} [{cjob.external_id}] '
                             f'({cjob.status}) | '
                             f'{cjob.resource_allocation_info}')
        return lines


class ProcessingWorkflow(qdb.base.QiitaObject):
    """Models a workflow defined by the user

    Parameters
    ----------
    user : qiita_db.user.User
        The user that modeled the workflow
    root : list of qiita_db.processing_job.ProcessingJob
        The first job in the workflow
    """
    _table = "processing_job_workflow"

    @classmethod
    def _common_creation_steps(cls, user, root_jobs, name=None):
        """Executes the common creation steps

        Parameters
        ----------
        user : qiita_db.user.User
            The user creating the workflow
        root_jobs : list of qiita_db.processing_job.ProcessingJob
            The root jobs of the workflow
        name : str, optional
            The name of the workflow. Default: generated from user's name
        """
        with qdb.sql_connection.TRN:
            # Insert the workflow in the processing_job_workflow table
            name = name if name else "%s's workflow" % user.info['name']
            sql = """INSERT INTO qiita.processing_job_workflow (email, name)
                     VALUES (%s, %s)
                     RETURNING processing_job_workflow_id"""
            qdb.sql_connection.TRN.add(sql, [user.email, name])
            w_id = qdb.sql_connection.TRN.execute_fetchlast()
            # Connect the workflow with it's initial set of jobs
            sql = """INSERT INTO qiita.processing_job_workflow_root
                        (processing_job_workflow_id, processing_job_id)
                     VALUES (%s, %s)"""
            sql_args = [[w_id, j.id] for j in root_jobs]
            qdb.sql_connection.TRN.add(sql, sql_args, many=True)
            qdb.sql_connection.TRN.execute()

        return cls(w_id)

    @classmethod
    def from_default_workflow(cls, user, dflt_wf, req_params, name=None,
                              force=False):
        """Creates a new processing workflow from a default workflow

        Parameters
        ----------
        user : qiita_db.user.User
            The user creating the workflow
        dflt_wf : qiita_db.software.DefaultWorkflow
            The default workflow
        req_params : dict of {qdb.software.Command: dict of {str: object}}
            The required parameters values for the source commands in the
            workflow, keyed by command. The inner dicts are keyed by
            parameter name.
        name : str, optional
            Name of the workflow. Default: generated from user's name
        force : bool
            Force creation on duplicated parameters

        Returns
        -------
        qiita_db.processing_job.ProcessingWorkflow
            The newly created workflow
        """
        with qdb.sql_connection.TRN:
            dflt_g = dflt_wf.graph

            # Find the roots of the workflow. That is, the nodes that do not
            # have a parent in the graph (in_degree = 0)
            in_degrees = dflt_g.in_degree()

            # We can potentially access this information from the nodes
            # multiple times, so caching in here
            # [0] in_degrees returns a tuple, where [0] is the element we want
            all_nodes = {}
            roots = {}

            for node, position in in_degrees:
                dp = node.default_parameter
                cmd = dp.command
                if position == 0:
                    roots[node] = (cmd, dp)
                all_nodes[node] = (cmd, dp)

            # Check that we have all the required parameters
            root_cmds = set(c for c, _ in roots.values())
            if root_cmds != set(req_params):
                error_msg = ['Provided required parameters do not match the '
                             'initial set of commands for the workflow.']
                missing = [c.name for c in root_cmds - set(req_params)]
                if missing:
                    error_msg.append(
                        ' Command(s) "%s" are missing the required parameter '
                        'set.' % ', '.join(missing))
                extra = [c.name for c in set(req_params) - root_cmds]
                if extra:
                    error_msg.append(
                        ' Paramters for command(s) "%s" have been provided, '
                        'but they are not the initial commands for the '
                        'workflow.' % ', '.join(extra))
                raise qdb.exceptions.QiitaDBError(''.join(error_msg))

            # Start creating the root jobs
            node_to_job = {
                n: ProcessingJob.create(
                    user,
                    qdb.software.Parameters.from_default_params(
                        p, req_params[c]), force)
                for n, (c, p) in roots.items()}
            root_jobs = node_to_job.values()

            # SQL used to create the edges between jobs
            sql = """INSERT INTO qiita.parent_processing_job
                        (parent_id, child_id)
                     VALUES (%s, %s)"""

            # Create the rest of the jobs. These are different form the root
            # jobs because they depend on other jobs to complete in order to be
            # submitted
            for n in nx.topological_sort(dflt_g):
                if n in node_to_job:
                    # We have already visited this node
                    # (because it is a root node)
                    continue

                cmd, dflt_params = all_nodes[n]
                job_req_params = {}
                parent_ids = []

                # Each incoming edge represents an artifact that is generated
                # by the source job of the edge
                for source, dest, data in dflt_g.in_edges(n, data=True):
                    # Retrieve the id of the parent job - it already exists
                    # because we are visiting the nodes in topological order
                    source_id = node_to_job[source].id
                    parent_ids.append(source_id)
                    # Get the connections between the job and the source
                    connections = data['connections'].connections
                    for out, in_param, _ in connections:
                        # We take advantage of the fact the parameters are
                        # stored in JSON to encode the name of the output
                        # artifact from the previous job
                        job_req_params[in_param] = [source_id, out]

                # At this point we should have all the requried parameters for
                # the current job, so create it
                new_job = ProcessingJob.create(
                    user, qdb.software.Parameters.from_default_params(
                        dflt_params, job_req_params), force)
                node_to_job[n] = new_job

                # Create the parent-child links in the DB
                sql_args = [[pid, new_job.id] for pid in parent_ids]
                qdb.sql_connection.TRN.add(sql, sql_args, many=True)

            return cls._common_creation_steps(user, root_jobs, name)

    @classmethod
    def from_scratch(cls, user, parameters, name=None, force=False):
        """Creates a new processing workflow from scratch

        Parameters
        ----------
        user : qiita_db.user.User
            The user creating the workflow
        parameters : qiita_db.software.Parameters
            The parameters of the first job in the workflow
        name : str, optional
            Name of the workflow. Default: generated from user's name
        force : bool
            Force creation on duplicated parameters

        Returns
        -------
        qiita_db.processing_job.ProcessingWorkflow
            The newly created workflow
        """
        job = ProcessingJob.create(user, parameters, force)
        return cls._common_creation_steps(user, [job], name)

    @property
    def name(self):
        """"The name of the workflow

        Returns
        -------
        str
            The name of the workflow
        """
        with qdb.sql_connection.TRN:
            sql = """SELECT name
                     FROM qiita.processing_job_workflow
                     WHERE processing_job_workflow_id = %s"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            return qdb.sql_connection.TRN.execute_fetchlast()

    @property
    def user(self):
        """The user that created the workflow

        Returns
        -------
        qdb.user.User
            The user that created the workflow
        """
        with qdb.sql_connection.TRN:
            sql = """SELECT email
                     FROM qiita.processing_job_workflow
                     WHERE processing_job_workflow_id = %s"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            email = qdb.sql_connection.TRN.execute_fetchlast()
            return qdb.user.User(email)

    @property
    def graph(self):
        """Returns the graph of jobs that represent the workflow

        Returns
        -------
        networkx.DiGraph
            The graph representing the workflow
        """
        g = nx.DiGraph()
        with qdb.sql_connection.TRN:
            # Retrieve all graph workflow nodes
            sql = """SELECT parent_id, child_id
                     FROM qiita.get_processing_workflow_edges(%s)"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            edges = qdb.sql_connection.TRN.execute_fetchindex()
            nodes = {}
            if edges:
                nodes = {jid: ProcessingJob(jid)
                         for jid in set(chain.from_iterable(edges))}
                edges = [(nodes[s], nodes[d]) for s, d in edges]
                g.add_edges_from(edges)
            # It is possible that there are root jobs that doesn't have any
            # child, so they do not appear on edge list
            sql = """SELECT processing_job_id
                     FROM qiita.processing_job_workflow_root
                     WHERE processing_job_workflow_id = %s"""
            sql_args = [self.id]
            if nodes:
                sql += " AND processing_job_id NOT IN %s"
                sql_args.append(tuple(nodes))
            qdb.sql_connection.TRN.add(sql, sql_args)
            nodes = [
                ProcessingJob(jid)
                for jid in qdb.sql_connection.TRN.execute_fetchflatten()]
            g.add_nodes_from(nodes)

        return g

    def _raise_if_not_in_construction(self):
        """Raises an error if the workflow is not in construction

        Raises
        ------
        qiita_db.exceptions.QiitaDBOperationNotPermittedError
            If the workflow is not in construction
        """
        with qdb.sql_connection.TRN:
            # To know if the workflow is in construction or not it suffices
            # to look at the status of the root jobs
            sql = """SELECT DISTINCT processing_job_status
                     FROM qiita.processing_job_workflow_root
                        JOIN qiita.processing_job USING (processing_job_id)
                        JOIN qiita.processing_job_status
                            USING (processing_job_status_id)
                     WHERE processing_job_workflow_id = %s"""
            qdb.sql_connection.TRN.add(sql, [self.id])
            res = qdb.sql_connection.TRN.execute_fetchflatten()
            # If the above SQL query returns a single element and the value
            # is different from in construction, it means that all the jobs
            # in the workflow are in the same status and it is not
            # 'in_construction', hence raise the error. If the above SQL query
            # returns more than value (len(res) > 1) it means that the workflow
            # is no longer in construction cause some jobs have been submited
            # for processing. Note that if the above query doesn't retrun any
            # value, it means that no jobs are in the workflow and that means
            # that the workflow is in construction.
            if (len(res) == 1 and res[0] != 'in_construction') or len(res) > 1:
                # The workflow is no longer in construction, raise an error
                raise qdb.exceptions.QiitaDBOperationNotPermittedError(
                    "Workflow not in construction")

    def add(self, dflt_params, connections=None, req_params=None,
            opt_params=None, force=False):
        """Adds a new job to the workflow

        Parameters
        ----------
        dflt_params : qiita_db.software.DefaultParameters
            The DefaultParameters object used
        connections : dict of {qiita_db.processing_job.ProcessingJob:
                               {str: str}}, optional
            Dictionary keyed by the jobs in which the new job depends on,
            and values is a dict mapping between source outputs and new job
            inputs
        req_params : dict of {str: object}, optional
            Any extra required parameter values, keyed by parameter name.
            Default: None, all the requried parameters are provided through
            the `connections` dictionary
        opt_params : dict of {str: object}, optional
            The optional parameters to change from the default set, keyed by
            parameter name. Default: None, use the values in `dflt_params`
        force : bool
            Force creation on duplicated parameters

        Raises
        ------
        qiita_db.exceptions.QiitaDBOperationNotPermittedError
            If the workflow is not in construction
        """
        with qdb.sql_connection.TRN:
            self._raise_if_not_in_construction()

            # checking that the new number of artifacts is not above
            # max_artifacts_in_workflow
            current_artifacts = sum(
                [len(j.command.outputs) for j in self.graph.nodes()])
            to_add_artifacts = len(dflt_params.command.outputs)
            total_artifacts = current_artifacts + to_add_artifacts
            max_artifacts = qdb.util.max_artifacts_in_workflow()
            if total_artifacts > max_artifacts:
                raise ValueError(
                    "Cannot add new job because it will create more "
                    f"artifacts (current: {current_artifacts} + new: "
                    f"{to_add_artifacts} = {total_artifacts}) that what is "
                    f"allowed in a single workflow ({max_artifacts})")

            if connections:
                # The new Job depends on previous jobs in the workflow
                req_params = req_params if req_params else {}
                # Loop through all the connections to add the relevant
                # parameters
                for source, mapping in connections.items():
                    source_id = source.id
                    for out, in_param in mapping.items():
                        req_params[in_param] = [source_id, out]

                new_job = ProcessingJob.create(
                    self.user, qdb.software.Parameters.from_default_params(
                        dflt_params, req_params, opt_params=opt_params), force)

                # SQL used to create the edges between jobs
                sql = """INSERT INTO qiita.parent_processing_job
                            (parent_id, child_id)
                         VALUES (%s, %s)"""
                sql_args = [[s.id, new_job.id] for s in connections]
                qdb.sql_connection.TRN.add(sql, sql_args, many=True)
                qdb.sql_connection.TRN.execute()
            else:
                # The new job doesn't depend on any previous job in the
                # workflow, so it is a new root job
                new_job = ProcessingJob.create(
                    self.user, qdb.software.Parameters.from_default_params(
                        dflt_params, req_params, opt_params=opt_params), force)
                sql = """INSERT INTO qiita.processing_job_workflow_root
                            (processing_job_workflow_id, processing_job_id)
                         VALUES (%s, %s)"""
                sql_args = [self.id, new_job.id]
                qdb.sql_connection.TRN.add(sql, sql_args)
                qdb.sql_connection.TRN.execute()

            return new_job

    def remove(self, job, cascade=False):
        """Removes a given job from the workflow

        Parameters
        ----------
        job : qiita_db.processing_job.ProcessingJob
            The job to be removed
        cascade : bool, optional
            If true, remove the also the input job's children. Default: False.

        Raises
        ------
        qiita_db.exceptions.QiitaDBOperationNotPermittedError
            If the workflow is not in construction
            If the job to be removed has children and `cascade` is `False`
        """
        with qdb.sql_connection.TRN:
            self._raise_if_not_in_construction()

            # Check if the given job has children
            children = list(job.children)
            if children:
                if not cascade:
                    raise qdb.exceptions.QiitaDBOperationNotPermittedError(
                        "Can't remove job '%s': it has children" % job.id)
                else:
                    # We need to remove all job's children, remove them first
                    # and then remove the current job
                    for c in children:
                        self.remove(c, cascade=True)

            # Remove any edges (it can only appear as a child)
            sql = """DELETE FROM qiita.parent_processing_job
                     WHERE child_id = %s"""
            qdb.sql_connection.TRN.add(sql, [job.id])

            # Remove as root job
            sql = """DELETE FROM qiita.processing_job_workflow_root
                     WHERE processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [job.id])

            # Remove the input reference
            sql = """DELETE FROM qiita.artifact_processing_job
                     WHERE processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [job.id])

            # Remove the job
            sql = """DELETE FROM qiita.processing_job
                     WHERE processing_job_id = %s"""
            qdb.sql_connection.TRN.add(sql, [job.id])

            qdb.sql_connection.TRN.execute()

    def submit(self):
        """Submits the workflow to execution

        Raises
        ------
        qiita_db.exceptions.QiitaDBOperationNotPermittedError
            If the workflow is not in construction
        """
        with qdb.sql_connection.TRN:
            self._raise_if_not_in_construction()

            g = self.graph
            # In order to avoid potential race conditions, we are going to set
            # all the children in 'waiting' status before submitting
            # the root nodes
            in_degrees = dict(g.in_degree())
            roots = []
            for job, degree in in_degrees.items():
                if degree == 0:
                    roots.append(job)
                else:
                    job._set_status('waiting')

            for job in roots:
                job.submit()