• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

4dn-dcic / dcicwrangling / 13682489429

05 Mar 2025 06:03PM UTC coverage: 33.8%. Remained the same
13682489429

push

github

web-flow
Merge pull request #117 from 4dn-dcic/ajs_upd_dependencies

Ajs upd dependencies

2 of 2 new or added lines in 1 file covered. (100.0%)

22 existing lines in 14 files now uncovered.

1256 of 3716 relevant lines covered (33.8%)

2.02 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/scripts/load_items_json.py
1
#!/usr/bin/env python3
2

3
import sys
×
4
import argparse
×
5
import json
×
6
from datetime import datetime
×
7
from uuid import uuid4
×
8
from dcicutils.ff_utils import (
×
9
    get_authentication_with_server,
10
    post_metadata,
11
    get_metadata,
12
    patch_metadata,
13
)
14
from functions.script_utils import create_ff_arg_parser, authenticate
×
UNCOV
15
''' Will attempt to load data from a file into the database using the load_data endpoint if it can
16
    or post/patch_metadata if not
17
    The file can be a simple list of json items in which case you need to specify an item type
18
    with the --itype option (file created by generate_ontology is like this) or the file can
19
    specify a dictionary with item types as keys and list of jsons as values.
20

21
    If the --as_file option is used the json items must contain a uuid and the endpoint will
22
    attempt to read the file from the request - no ordering and if there are dependencies to
23
    linked items those items must either already be loaded or present in the file
24
    WARNING: currently only works locally or if file is uploaded as part of the app file system
25
'''
26

27
ORDER = [
×
28
    'user',
29
    'award',
30
    'lab',
31
    'static_section',
32
    'higlass_view_config',
33
    'page',
34
    'ontology',
35
    'ontology_term',
36
    'file_format',
37
    'badge',
38
    'organism',
39
    'genomic_region',
40
    'gene',
41
    'bio_feature',
42
    'target',
43
    'imaging_path',
44
    'publication',
45
    'publication_tracking',
46
    'document',
47
    'image',
48
    'vendor',
49
    'construct',
50
    'modification',
51
    'protocol',
52
    'sop_map',
53
    'biosample_cell_culture',
54
    'individual_human',
55
    'individual_mouse',
56
    'individual_fly',
57
    'individual_chicken',
58
    'biosource',
59
    'antibody',
60
    'enzyme',
61
    'treatment_rnai',
62
    'treatment_agent',
63
    'biosample',
64
    'quality_metric_fastqc',
65
    'quality_metric_bamqc',
66
    'quality_metric_pairsqc',
67
    'quality_metric_dedupqc_repliseq',
68
    'quality_metric_chipseq',
69
    'quality_metric_atacseq',
70
    'microscope_setting_d1',
71
    'microscope_setting_d2',
72
    'microscope_setting_a1',
73
    'microscope_setting_a2',
74
    'file_fastq',
75
    'file_processed',
76
    'file_reference',
77
    'file_calibration',
78
    'file_microscopy',
79
    'file_set',
80
    'file_set_calibration',
81
    'file_set_microscope_qc',
82
    'file_vistrack',
83
    'experiment_hi_c',
84
    'experiment_capture_c',
85
    'experiment_repliseq',
86
    'experiment_atacseq',
87
    'experiment_chiapet',
88
    'experiment_damid',
89
    'experiment_seq',
90
    'experiment_tsaseq',
91
    'experiment_mic',
92
    'experiment_set',
93
    'experiment_set_replicate',
94
    'data_release_update',
95
    'software',
96
    'analysis_step',
97
    'workflow',
98
    'workflow_mapping',
99
    'workflow_run_sbg',
100
    'workflow_run_awsem'
101
]
102

103

104
def get_args():  # pragma: no cover
105
    parser = argparse.ArgumentParser(
106
        description='Given a file of item jsons try to load into database',
107
        parents=[create_ff_arg_parser()],
108
        formatter_class=argparse.RawDescriptionHelpFormatter,
109
    )
110

111
    parser.add_argument('infile',
112
                        help="the datafile containing json formatted items")
113
    parser.add_argument('--itypes',
114
                        nargs='*',
115
                        help="The item type(s) to load if not specified in the file by store key(s)")
116
    parser.add_argument('--id-field',
117
                        help="Field name to used as identifier for items (all item types in file)")
118
    parser.add_argument('--as-file',
119
                        default=False,
120
                        action='store_true',
121
                        help="Will attempt to load and process the file directly in the request. "
122
                             "This currently only works locally or if the file has been uploaded to "
123
                             "the apps file system")
124
    args = parser.parse_args()
125
    return args
126

127

128
def patch_jsons(auth, to_patch):
×
129
    for item in to_patch:
×
130
        uid = item.get('uuid')
×
131
        try:
×
132
            patch_metadata(item, uid, auth)
×
133
        except Exception as e:
×
134
            print(e)
×
135

136

137
def load_json(auth, itype, item_list, chunk_size=50):
×
138
    list_length = len(item_list)
×
139
    curr_pos = 0
×
140
    while curr_pos < list_length:
×
141
        slice_for = chunk_size if (chunk_size and chunk_size <= (list_length - curr_pos)) else list_length - curr_pos
×
142
        new_end = curr_pos + slice_for
×
143
        chunk = item_list[curr_pos: new_end]
×
144
        store = {itype: chunk}
×
145
        payload = {'store': store, 'overwrite': True}
×
146
        if 'localhost' in auth.get('server', ''):
×
147
            payload['config_uri'] = 'development.ini'
×
148
        try:
×
149
            post_metadata(payload, 'load_data', auth)
×
150
        except Exception as e:
×
151
            print("PROBLEM WITH POST")
×
152
            print(e)
×
153
        curr_pos = new_end
×
154

155

156
def load_file(auth, itype, filename):
×
157
    payload = {'in_file': filename, 'overwrite': True, 'itype': itype}
×
158
    if 'localhost' in auth.get('server', ''):
×
159
        payload['config_uri'] = 'development.ini'
×
160
    try:
×
161
        post_metadata(payload, 'load_data', auth)
×
162
    except Exception:
×
163
        raise
×
164

165

166
def get_item(val, auth):
×
167
    try:
×
168
        return get_metadata(val, auth).get('uuid')
×
169
    except:
×
170
        return None
×
171

172

173
def check_for_existing(item, itype, idfields, auth):
×
174
    uid = None
×
175
    for ifield in idfields:
×
176
        id2chk = item.get(ifield)
×
177
        if id2chk:
×
178
            if ifield == 'aliases':
×
179
                for a in id2chk:
×
180
                    uid = get_item(a, auth)
×
181
                    if uid:
×
182
                        return uid
×
183
            else:
184
                chkid = itype + '/' + id2chk
×
185
                uid = get_item(chkid, auth)
×
186
                if uid:
×
187
                    return uid
×
188
    return uid
×
189

190

191
def main():  # pragma: no cover
192
    start = datetime.now()
193
    print(str(start))
194
    args = get_args()
195
    auth = scu.authenticate(key=args.key, keyfile=args.keyfile, env=args.env)
196
    print('working on ', auth.get('server'))
197
    if args.as_file:
198
        if not args.dbupdate:
199
            print("DRY RUN - use --dbupdate to update the database")
200
        else:
201
            try:
202
                load_file(auth, args.itype, args.infile)
203
            except Exception as e:
204
                print(e)
205
    else:
206
        with open(args.infile) as ifile:
207
            item_store = json.loads(ifile.read())
208
            if not args.itype:
209
                if not isinstance(item_store, dict):
210
                    print("File is not in correct format")
211
                    sys.exit(1)
212
            else:
213
                if not isinstance(item_store, list):
214
                    print("File is not in correct format")
215
                    sys.exit(1)
216
                item_store = {args.itype: item_store}
217
            for itype, items in sorted(item_store.items(), key=lambda x: ORDER.index(x[0])):
218
                if not args.dbupdate:
219
                    print('DRY RUN - would try to load {} {} items'.format(len(items), itype))
220
                    continue
221
                if args.id_field:
222
                    identifiers = [args.id_field]
223
                else:
224
                    schema_path = 'profiles/' + itype + '.json'
225
                    schema_info = get_metadata(schema_path, auth)
226
                    identifiers = schema_info.get('identifyingProperties')
227
                # checking to see if an item exists
228
                # if no can use load_data endpoint
229
                # if yes do it the old fashioned way
230
                to_patch = []
231
                to_post = []
232
                for item in items:
233
                    uid = item.get('uuid')
234
                    if uid:
235
                        exists = get_item(uid, auth)
236
                        if exists:  # try a patch
237
                            to_patch.append(item)
238
                        else:
239
                            to_post.append(item)
240
                    else:
241
                        uid = check_for_existing(item, itype, identifiers, auth)
242
                        if uid:  # try a patch
243
                            item['uuid'] = uid
244
                            to_patch.append(item)
245
                        else:
246
                            uid = str(uuid4())
247
                            item['uuid'] = uid
248
                            to_post.append(item)
249
                if to_post:
250
                    load_json(auth, itype, to_post, chunk_size=1000)
251
                if to_patch:
252
                    patch_jsons(auth, to_patch)
253
    stop = datetime.now()
254
    print(str(stop))
255

256

257
if __name__ == '__main__':  # pragma: no cover
258
    main()
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc