Commit 7a77f9c3 authored by katharina.berger's avatar katharina.berger

add "retracted=false"

parent 6548f6c2
......@@ -213,7 +213,7 @@ def get_list_of_new_mapfiles(log, project, project_config, scan_directory, outpu
# query local Solr for published replica of the data
log.info("\tChecking local Solr=%s for replica of dataset=%s" % (local_master_solr_url, instance_id))
replica_query = 'instance_id:%s&replica:true' % instance_id
replica_query = 'instance_id:%s&replica:true&retracted=false' % instance_id
fields = []
num_found = query_solr(log, replica_query, None, solr_url=local_master_solr_url, solr_core='datasets')
......@@ -287,93 +287,6 @@ def check_consistency_replcia(log, config, project, input_directory, output_dire
datasets_incomplete_file.close()
#drspat = project_config.translate('directory_format')
#facets = project_config.get_facets('dataset_id')
#dataset_ids = set()
# # get a list of all datasets on filesystem
# for root, _, files in os.walk(scan_directory):
# if files:
# kv = {}
# f = files[0]
# ffp = os.path.join(root, f)
# res = re.search(drspat, ffp)
# for key in facets:
# kv[key] = res.group(key)
# kv['version'] = os.path.basename(os.path.dirname(ffp))
#
# # exclude datasets published as original data on the DKRZ data node
# if kv['institute'] not in EXCLUDE_INSTITUTES:
# master_id = interpolate(project_config.get('dataset_id', raw=True), kv)
# instance_id = '%s.%s' % (master_id, kv['version'])
#
# # query local Solr for published replica of the data
# log.info("\tChecking local Solr=%s for replica of dataset=%s" % (local_master_solr_url, instance_id))
#
# replica_query = 'instance_id:%s&replica:true' % instance_id
# fields = []
# replica_docs = query_solr(log, replica_query, fields, solr_url=local_master_solr_url, solr_core='datasets')
#
# if not replica_docs:
# dataset_ids.add(instance_id)
# else:
# log.info("\tReplica already published for dataset=%s" % instance_id)
# log.info("Create new mapfiles in directory %s" % (output_mapdir))
#
# master_solr_dict, index_nodes = get_list_of_shards(log, esgf_index_node_url)
#
# # 1) query all remote index nodes for the latest primary datasets
# fields = ['id', 'instance_id', 'version']
# for index_node in index_nodes:
#
# try:
# if index_node in master_solr_dict:
# remote_slave_solr_url = 'http://localhost:%s/solr' % master_solr_dict[index_node]
# else:
# remote_slave_solr_url = 'https://%s/solr' % index_node
# log.info("Querying Solr=%s for datasets with project=%s " % (remote_slave_solr_url, project))
# query1 = ('project:%s&replica:false&latest:true' % project)
# docs1 = query_solr(log,
# query1,
# fields,
# solr_url=remote_slave_solr_url,
# solr_core='datasets')
# except:
# log.error("Error querying index node %s" % remote_slave_solr_url)
#
# docs1 = []
#
# new_datasets_dict = {}
#
# # 2) query local index for replicas of the same datasets
# # that are flagged with latest='true'
# for doc1 in docs1:
# instance_id = doc1['instance_id']
# dataset_id = doc1['id']
# log.info("\tChecking local Solr=%s for replica of dataset=%s" % (local_master_solr_url, instance_id))
#
# query2 = 'instance_id:%s&replica:true' % instance_id
# docs2 = query_solr(log,
# query2,
# fields,
# solr_url=local_master_solr_url,
# solr_core='datasets')
# if not docs2:
# # get a list of file checksums
# log.info("\tQuerying Solr=%s for list of files=%s" % (remote_slave_solr_url, instance_id))
# fields_file = ['checksum', 'title']
# query3 = 'dataset_id:%s' % dataset_id
# docs3 = query_solr(log,
# query3,
# fields,
# solr_url=remote_slave_solr_url,
# solr_core='files')
# for doc3 in docs3:
# title = doc1['title']
# checksum = doc1['checksum']
#
def main(argv):
try:
......@@ -437,13 +350,6 @@ def main(argv):
print "Please use exactly one from ['--list-retracted-version', '--get-list-of-new-mapfiles', '--create-new-mapfiles-from-list', '--check-consistency']"
sys.exit(0)
# if create_mapfiles:
# if len(lastargs) != 0:
# scan_directory = lastargs[0]
# else:
# print "Please specify a starting directory to scan."
# sys.exit(0)
configfile = os.path.join(config_dir, config_filename)
if not os.path.isfile(configfile):
print "Missing configfile %s" % configfile
......@@ -475,6 +381,5 @@ def main(argv):
elif check_mapfiles:
check_consistency_replcia(log, config, project, scan_directory, output_folder, recipients=recipients)
if __name__ == '__main__':
main(sys.argv[1:])
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment