6489602581

Committed 12 Oct 2023 12:25AM UTC coverage: 45.573% (+2.8%) from 42.772%

Build # 6489602581

Build Type

push

github

Committed by

web-flow

Commit Message

Merge pull request #363 from jo-basevi/358-date-based-frequency

Add support for date-based restart frequency

Run Details

111 of 147 new or added lines in 10 files covered. (75.51%)

2 existing lines in 1 file now uncovered.

1580 of 3467 relevant lines covered (45.57%)

1.37 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

16.46

/payu/subcommands/run_cmd.py

import os
import argparse

from payu import cli
from payu.experiment import Experiment
from payu.laboratory import Laboratory
import payu.subcommands.args as args
from payu import fsops
from payu.manifest import Manifest

title = 'run'
parameters = {'description': 'Run the model experiment'}

arguments = [args.model, args.config, args.initial, args.nruns,
             args.laboratory, args.reproduce, args.force,
             args.force_prune_restarts]


def runcmd(model_type, config_path, init_run, n_runs, lab_path,
           reproduce=False, force=False, force_prune_restarts=False):

    # Get job submission configuration
    pbs_config = fsops.read_config(config_path)
    pbs_vars = cli.set_env_vars(init_run=init_run,
                                n_runs=n_runs,
                                lab_path=lab_path,
                                reproduce=reproduce,
                                force=force,
                                force_prune_restarts=force_prune_restarts)

    # Set the queue
    # NOTE: Maybe force all jobs on the normal queue
    if 'queue' not in pbs_config:
        pbs_config['queue'] = 'normal'

    # TODO: Create drivers for servers
    platform = pbs_config.get('platform', {})
    max_cpus_per_node = platform.get('nodesize', 48)
    max_ram_per_node = platform.get('nodemem', 192)

    # Adjust the CPUs for any model-specific settings
    # TODO: Incorporate this into the Model driver
    mask_table = pbs_config.get('mask_table', False)
    if mask_table:

        # Check if a mask table exists
        # TODO: Is control_path defined at this stage?
        mask_table_fname = None
        for fname in os.listdir(os.curdir):
            if fname.startswith('mask_table'):
                mask_table_fname = fname

        # TODO TODO

    if 'ncpureq' in pbs_config:
        # Hard override of CPU request
        n_cpus_request = pbs_config.get('ncpureq')

    elif 'submodels' in pbs_config and 'ncpus' not in pbs_config:
        # Increase the cpu request to match a complete node

        n_cpus_request = 0
        submodel_configs = pbs_config['submodels']
        for model_config in submodel_configs:
            n_cpus_request += model_config.get('ncpus', 0)

    else:
        n_cpus_request = pbs_config.get('ncpus', 1)

    n_cpus = n_cpus_request
    n_cpus_per_node = pbs_config.get('npernode', max_cpus_per_node)

    assert n_cpus_per_node <= max_cpus_per_node

    node_misalignment = n_cpus % max_cpus_per_node != 0
    node_increase = n_cpus_per_node < max_cpus_per_node

    # Increase the CPUs to accommodate the cpu-per-node request
    if n_cpus > max_cpus_per_node and (node_increase or node_misalignment):

        # Number of requested nodes
        n_nodes = 1 + (n_cpus - 1) // n_cpus_per_node
        n_cpu_request = max_cpus_per_node * n_nodes
        n_inert_cpus = n_cpu_request - n_cpus

        print('payu: warning: Job request includes {n} unused CPUs.'
              ''.format(n=n_inert_cpus))

        # Increase CPU request to match the effective node request
        n_cpus = max_cpus_per_node * n_nodes

        # Update the ncpus field in the config
        if n_cpus != n_cpus_request:
            print('payu: warning: CPU request increased from {n_req} to {n}'
                  ''.format(n_req=n_cpus_request, n=n_cpus))

    # Update the (possibly unchanged) value of ncpus
    pbs_config['ncpus'] = n_cpus

    # Set memory to use the complete node if unspecified
    pbs_mem = pbs_config.get('mem')
    if not pbs_mem:
        if n_cpus > max_cpus_per_node:
            pbs_mem = (n_cpus // max_cpus_per_node) * max_ram_per_node
        else:
            pbs_mem = n_cpus * (max_ram_per_node // max_cpus_per_node)

        pbs_config['mem'] = '{0}GB'.format(pbs_mem)

    cli.submit_job('payu-run', pbs_config, pbs_vars)


def runscript():

    parser = argparse.ArgumentParser()
    for arg in arguments:
        parser.add_argument(*arg['flags'], **arg['parameters'])

    run_args = parser.parse_args()

    lab = Laboratory(run_args.model_type, run_args.config_path,
                     run_args.lab_path)
    expt = Experiment(lab, reproduce=run_args.reproduce, force=run_args.force)

    n_runs_per_submit = expt.config.get('runspersub', 1)
    subrun = 1

    while True:

        print('nruns: {0} nruns_per_submit: {1} subrun: {2}'
              ''.format(expt.n_runs, n_runs_per_submit, subrun))

        expt.setup()
        expt.run()
        expt.archive(force_prune_restarts=run_args.force_prune_restarts)

        # Finished runs
        if expt.n_runs == 0:
            break

        # Need to manually increment the run counter if still looping
        if n_runs_per_submit > 1 and subrun < n_runs_per_submit:
            expt.counter += 1
            # Re-initialize manifest: important to clear out restart manifest
            # note no attempt to preserve reproduce flag, it makes no sense
            # to on subsequent runs
            expt.manifest = Manifest(expt.config.get('manifest', {}),
                                     reproduce=False)
            expt.set_output_paths()
            # Does not make sense to reproduce a multiple run.
            # Take care of this with argument processing?
            expt.reproduce = False
        else:
            break

        subrun += 1

    if expt.n_runs > 0:
        expt.resubmit()

1	import os	3✔
2	import argparse	3✔
3
4	from payu import cli	3✔
5	from payu.experiment import Experiment	3✔
6	from payu.laboratory import Laboratory	3✔
7	import payu.subcommands.args as args	3✔
8	from payu import fsops	3✔
9	from payu.manifest import Manifest	3✔
10
11	title = 'run'	3✔
12	parameters = {'description': 'Run the model experiment'}	3✔
13
14	arguments = [args.model, args.config, args.initial, args.nruns,	3✔
15	args.laboratory, args.reproduce, args.force,
16	args.force_prune_restarts]
17
18
19	def runcmd(model_type, config_path, init_run, n_runs, lab_path,	3✔
20	reproduce=False, force=False, force_prune_restarts=False):
21
22	# Get job submission configuration
23	pbs_config = fsops.read_config(config_path)	×
24	pbs_vars = cli.set_env_vars(init_run=init_run,	×
25	n_runs=n_runs,
26	lab_path=lab_path,
27	reproduce=reproduce,
28	force=force,
29	force_prune_restarts=force_prune_restarts)
30
31	# Set the queue
32	# NOTE: Maybe force all jobs on the normal queue
33	if 'queue' not in pbs_config:	×
34	pbs_config['queue'] = 'normal'	×
35
36	# TODO: Create drivers for servers
37	platform = pbs_config.get('platform', {})	×
38	max_cpus_per_node = platform.get('nodesize', 48)	×
39	max_ram_per_node = platform.get('nodemem', 192)	×
40
41	# Adjust the CPUs for any model-specific settings
42	# TODO: Incorporate this into the Model driver
43	mask_table = pbs_config.get('mask_table', False)	×
44	if mask_table:	×
45
46	# Check if a mask table exists
47	# TODO: Is control_path defined at this stage?
48	mask_table_fname = None	×
49	for fname in os.listdir(os.curdir):	×
50	if fname.startswith('mask_table'):	×
51	mask_table_fname = fname	×
52
53	# TODO TODO
54
55	if 'ncpureq' in pbs_config:	×
56	# Hard override of CPU request
57	n_cpus_request = pbs_config.get('ncpureq')	×
58
59	elif 'submodels' in pbs_config and 'ncpus' not in pbs_config:	×
60	# Increase the cpu request to match a complete node
61
62	n_cpus_request = 0	×
63	submodel_configs = pbs_config['submodels']	×
64	for model_config in submodel_configs:	×
65	n_cpus_request += model_config.get('ncpus', 0)	×
66
67	else:
68	n_cpus_request = pbs_config.get('ncpus', 1)	×
69
70	n_cpus = n_cpus_request	×
71	n_cpus_per_node = pbs_config.get('npernode', max_cpus_per_node)	×
72
73	assert n_cpus_per_node <= max_cpus_per_node	×
74
75	node_misalignment = n_cpus % max_cpus_per_node != 0	×
76	node_increase = n_cpus_per_node < max_cpus_per_node	×
77
78	# Increase the CPUs to accommodate the cpu-per-node request
79	if n_cpus > max_cpus_per_node and (node_increase or node_misalignment):	×
80
81	# Number of requested nodes
82	n_nodes = 1 + (n_cpus - 1) // n_cpus_per_node	×
83	n_cpu_request = max_cpus_per_node * n_nodes	×
84	n_inert_cpus = n_cpu_request - n_cpus	×
85
86	print('payu: warning: Job request includes {n} unused CPUs.'	×
87	''.format(n=n_inert_cpus))
88
89	# Increase CPU request to match the effective node request
90	n_cpus = max_cpus_per_node * n_nodes	×
91
92	# Update the ncpus field in the config
93	if n_cpus != n_cpus_request:	×
94	print('payu: warning: CPU request increased from {n_req} to {n}'	×
95	''.format(n_req=n_cpus_request, n=n_cpus))
96
97	# Update the (possibly unchanged) value of ncpus
98	pbs_config['ncpus'] = n_cpus	×
99
100	# Set memory to use the complete node if unspecified
101	pbs_mem = pbs_config.get('mem')	×
102	if not pbs_mem:	×
103	if n_cpus > max_cpus_per_node:	×
104	pbs_mem = (n_cpus // max_cpus_per_node) * max_ram_per_node	×
105	else:
106	pbs_mem = n_cpus * (max_ram_per_node // max_cpus_per_node)	×
107
108	pbs_config['mem'] = '{0}GB'.format(pbs_mem)	×
109
110	cli.submit_job('payu-run', pbs_config, pbs_vars)	×
111
112
113	def runscript():	3✔
114
115	parser = argparse.ArgumentParser()	×
116	for arg in arguments:	×
117	parser.add_argument(arg['flags'], *arg['parameters'])	×
118
119	run_args = parser.parse_args()	×
120
121	lab = Laboratory(run_args.model_type, run_args.config_path,	×
122	run_args.lab_path)
123	expt = Experiment(lab, reproduce=run_args.reproduce, force=run_args.force)	×
124
125	n_runs_per_submit = expt.config.get('runspersub', 1)	×
126	subrun = 1	×
127
128	while True:	×
129
130	print('nruns: {0} nruns_per_submit: {1} subrun: {2}'	×
131	''.format(expt.n_runs, n_runs_per_submit, subrun))
132
133	expt.setup()	×
134	expt.run()	×
NEW 135	expt.archive(force_prune_restarts=run_args.force_prune_restarts)	×
136
137	# Finished runs
138	if expt.n_runs == 0:	×
139	break	×
140
141	# Need to manually increment the run counter if still looping
142	if n_runs_per_submit > 1 and subrun < n_runs_per_submit:	×
143	expt.counter += 1	×
144	# Re-initialize manifest: important to clear out restart manifest
145	# note no attempt to preserve reproduce flag, it makes no sense
146	# to on subsequent runs
147	expt.manifest = Manifest(expt.config.get('manifest', {}),	×
148	reproduce=False)
149	expt.set_output_paths()	×
150	# Does not make sense to reproduce a multiple run.
151	# Take care of this with argument processing?
152	expt.reproduce = False	×
153	else:
154	break	×
155
156	subrun += 1	×
157
158	if expt.n_runs > 0:	×
159	expt.resubmit()	×

payu-org / payu / 6489602581

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous