• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

4dn-dcic / dcicwrangling / 13682489429

05 Mar 2025 06:03PM UTC coverage: 33.8%. Remained the same
13682489429

push

github

web-flow
Merge pull request #117 from 4dn-dcic/ajs_upd_dependencies

Ajs upd dependencies

2 of 2 new or added lines in 1 file covered. (100.0%)

22 existing lines in 14 files now uncovered.

1256 of 3716 relevant lines covered (33.8%)

2.02 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/scripts/item_loader.py
1
#!/usr/bin/env python3
2

3
import sys
×
4
import argparse
×
5
import json
×
6
from datetime import datetime
×
7
from uuid import uuid4
×
8
from dcicutils.ff_utils import (
×
9
    post_metadata,
10
    get_metadata,
11
    patch_metadata,
12
)
13
from functions.script_utils import create_ff_arg_parser
×
UNCOV
14
''' Will attempt to load data from a file into the database using the load_data endpoint if it can
15
    or post/patch_metadata if not
16
    The file can be a simple list of json items in which case you need to specify an item type
17
    with the --itype option (file created by generate_ontology is like this) or the file can
18
    specify a dictionary with item types as keys and list of jsons as values.
19

20
    If the --as_file option is used the json items must contain a uuid and the endpoint will
21
    attempt to read the file from the request - no ordering and if there are dependencies to
22
    linked items those items must either already be loaded or present in the file
23
    WARNING: currently only works locally or if file is uploaded as part of the app file system
24
'''
25

26
ORDER = [
×
27
    'user',
28
    'award',
29
    'lab',
30
    'static_section',
31
    'higlass_view_config',
32
    'page',
33
    'ontology',
34
    'ontology_term',
35
    'file_format',
36
    'badge',
37
    'organism',
38
    'genomic_region',
39
    'gene',
40
    'bio_feature',
41
    'target',
42
    'imaging_path',
43
    'publication',
44
    'publication_tracking',
45
    'document',
46
    'image',
47
    'vendor',
48
    'construct',
49
    'modification',
50
    'protocol',
51
    'sop_map',
52
    'biosample_cell_culture',
53
    'individual_human',
54
    'individual_mouse',
55
    'individual_fly',
56
    'individual_chicken',
57
    'biosource',
58
    'antibody',
59
    'enzyme',
60
    'treatment_rnai',
61
    'treatment_agent',
62
    'biosample',
63
    'quality_metric_fastqc',
64
    'quality_metric_bamqc',
65
    'quality_metric_pairsqc',
66
    'quality_metric_dedupqc_repliseq',
67
    'quality_metric_chipseq',
68
    'quality_metric_atacseq',
69
    'microscope_setting_d1',
70
    'microscope_setting_d2',
71
    'microscope_setting_a1',
72
    'microscope_setting_a2',
73
    'file_fastq',
74
    'file_processed',
75
    'file_reference',
76
    'file_calibration',
77
    'file_microscopy',
78
    'file_set',
79
    'file_set_calibration',
80
    'file_set_microscope_qc',
81
    'file_vistrack',
82
    'experiment_hi_c',
83
    'experiment_capture_c',
84
    'experiment_repliseq',
85
    'experiment_atacseq',
86
    'experiment_chiapet',
87
    'experiment_damid',
88
    'experiment_seq',
89
    'experiment_tsaseq',
90
    'experiment_mic',
91
    'experiment_set',
92
    'experiment_set_replicate',
93
    'data_release_update',
94
    'software',
95
    'analysis_step',
96
    'workflow',
97
    'workflow_mapping',
98
    'workflow_run_sbg',
99
    'workflow_run_awsem'
100
]
101

102

103
def get_args():  # pragma: no cover
104
    parser = argparse.ArgumentParser(
105
        description='Given a file of item jsons try to load into database',
106
        parents=[create_ff_arg_parser()],
107
        formatter_class=argparse.RawDescriptionHelpFormatter,
108
    )
109

110
    parser.add_argument('infile',
111
                        help="the datafile containing object data to import")
112
    parser.add_argument('--itype',
113
                        help="The item type to load if not specified in the file by dict key")
114
    parser.add_argument('--id-field',
115
                        help="Field name to used as identifier for items (all item types in file)")
116
    parser.add_argument('--as-file',
117
                        default=False,
118
                        action='store_true',
119
                        help="Will attempt to load and process the file directly in the request. "
120
                             "This currently only works locally or if the file has been uploaded to "
121
                             "the apps file system")
122
    args = parser.parse_args()
123
    return args
124

125

126
def patch_jsons(auth, to_patch):
×
127
    for item in to_patch:
×
128
        uid = item.get('uuid')
×
129
        try:
×
130
            patch_metadata(item, uid, auth)
×
131
        except Exception as e:
×
132
            print(e)
×
133

134

135
def load_json(auth, itype, item_list, chunk_size=50):
×
136
    list_length = len(item_list)
×
137
    curr_pos = 0
×
138
    while curr_pos < list_length:
×
139
        slice_for = chunk_size if (chunk_size and chunk_size <= (list_length - curr_pos)) else list_length - curr_pos
×
140
        new_end = curr_pos + slice_for
×
141
        chunk = item_list[curr_pos: new_end]
×
142
        store = {itype: chunk}
×
143
        payload = {'store': store, 'overwrite': True}
×
144
        if 'localhost' in auth.get('server', ''):
×
145
            payload['config_uri'] = 'development.ini'
×
146
        try:
×
147
            post_metadata(payload, 'load_data', auth)
×
148
        except Exception as e:
×
149
            print("PROBLEM WITH POST")
×
150
            print(e)
×
151
        curr_pos = new_end
×
152

153

154
def load_file(auth, itype, filename):
×
155
    payload = {'in_file': filename, 'overwrite': True, 'itype': itype}
×
156
    if 'localhost' in auth.get('server', ''):
×
157
        payload['config_uri'] = 'development.ini'
×
158
    try:
×
159
        post_metadata(payload, 'load_data', auth)
×
160
    except Exception:
×
161
        raise
×
162

163

164
def get_item(val, auth):
×
165
    try:
×
166
        return get_metadata(val, auth).get('uuid')
×
167
    except:
×
168
        return None
×
169

170

171
def check_for_existing(item, itype, idfields, auth):
×
172
    uid = None
×
173
    for ifield in idfields:
×
174
        id2chk = item.get(ifield)
×
175
        if id2chk:
×
176
            if ifield == 'aliases':
×
177
                for a in id2chk:
×
178
                    uid = get_item(a, auth)
×
179
                    if uid:
×
180
                        return uid
×
181
            else:
182
                chkid = itype + '/' + id2chk
×
183
                uid = get_item(chkid, auth)
×
184
                if uid:
×
185
                    return uid
×
186
    return uid
×
187

188

189
def main():  # pragma: no cover
190
    start = datetime.now()
191
    print(str(start))
192
    args = get_args()
193
    auth = scu.authenticate(key=args.key, keyfile=args.keyfile, env=args.env)
194
    print('working on ', auth.get('server'))
195
    if args.as_file:
196
        if not args.dbupdate:
197
            print("DRY RUN - use --dbupdate to update the database")
198
        else:
199
            try:
200
                load_file(auth, args.itype, args.infile)
201
            except Exception as e:
202
                print(e)
203
    else:
204
        with open(args.infile) as ifile:
205
            item_store = json.loads(ifile.read())
206
            if not args.itype:
207
                if not isinstance(item_store, dict):
208
                    print("File is not in correct format")
209
                    sys.exit(1)
210
            else:
211
                if not isinstance(item_store, list):
212
                    print("File is not in correct format")
213
                    sys.exit(1)
214
                item_store = {args.itype: item_store}
215
            for itype, items in sorted(item_store.items(), key=lambda x: ORDER.index(x[0])):
216
                if not args.dbupdate:
217
                    print('DRY RUN - would try to load {} {} items'.format(len(items), itype))
218
                    continue
219
                if args.id_field:
220
                    identifiers = [args.id_field]
221
                else:
222
                    schema_path = 'profiles/' + itype + '.json'
223
                    schema_info = get_metadata(schema_path, auth)
224
                    identifiers = schema_info.get('identifyingProperties')
225
                # checking to see if an item exists
226
                # if no can use load_data endpoint
227
                # if yes do it the old fashioned way
228
                to_patch = []
229
                to_post = []
230
                for item in items:
231
                    uid = item.get('uuid')
232
                    if uid:
233
                        exists = get_item(uid, auth)
234
                        if exists:  # try a patch
235
                            to_patch.append(item)
236
                        else:
237
                            to_post.append(item)
238
                    else:
239
                        uid = check_for_existing(item, itype, identifiers, auth)
240
                        if uid:  # try a patch
241
                            item['uuid'] = uid
242
                            to_patch.append(item)
243
                        else:
244
                            uid = str(uuid4())
245
                            item['uuid'] = uid
246
                            to_post.append(item)
247
                if to_post:
248
                    load_json(auth, itype, to_post, chunk_size=1000)
249
                if to_patch:
250
                    patch_jsons(auth, to_patch)
251
    stop = datetime.now()
252
    print(str(stop))
253

254

255
if __name__ == '__main__':  # pragma: no cover
256
    main()
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc