Commit 894b8b21 authored by katharina.berger's avatar katharina.berger

chenge unpublish

parent b636f637
......@@ -196,64 +196,113 @@ def unpublish_retracted(log,
log.info("Checking replicas start datetime=%s stop datetime=%s "
"dry_run=%s" % (start_datetime, stop_datetime, dry_run))
index_nodes, master_solr_dict = get_list_of_shards(log, config, esgf_index_node_url)
# counter
num_datasets_unpublished = 0
# 1) query all remote index nodes for the latest primary datasets
# that have changed in the given time period
fields = ['id', 'master_id', 'version', '_timestamp']
for index_node in index_nodes:
try:
if index_node in master_solr_dict:
remote_slave_solr_url = 'http://esgf-data.dkrz.de:%s/solr' % master_solr_dict[index_node]
else:
remote_slave_solr_url = 'https://%s/solr' % index_node
log.info("Querying Solr=%s for datasets with project=%s start_datetime=%s stop_datetime=%s"
% (remote_slave_solr_url, project, start_datetime, stop_datetime))
if not start_datetime or not stop_datetime:
query1 = ('project:%s&replica:false&retracted:true' % project)
else:
query1 = ('project:%s&replica:false&retracted:true&_timestamp:[%s TO %s]' % (project, start_datetime, stop_datetime))
docs1 = query_solr(log,
query1,
fields,
solr_url=remote_slave_solr_url,
solr_core='datasets')
if len(docs1) > 0:
log.info("\tFound %s datasets that have been retracted, checking local Solr for replicas" % len(docs1))
fields = ['instance_id', 'retracted']
except:
log.error("Error querying index node %s" % remote_slave_solr_url)
# get a list of all published replica
replica_list = set()
query_replica = '&replica:true'
docs_replica = query_solr(log,
query_replica,
fields,
solr_url=local_master_solr_url,
solr_core='datasets')
docs1 = []
for doc in docs_replica:
replica_list.add(docs_replica['instance_id'])
# 2) query local index for replicas of the same datasets
for doc1 in docs1:
v1 = int(doc1['version'])
instance_id = doc1['instance_id']
dataset_id1 = doc1['id']
log.info("\tChecking local Solr=%s for replica of dataset=%s version=%s" % (local_master_solr_url, dataset_id1, v1))
query2 = 'instance_id:%s&replica:true' % instance_id
docs2 = query_solr(log,
query2,
fields,
solr_url=local_master_solr_url,
solr_core='datasets')
esgunpublish_logfile = open(os.path.join(outdir, 'unpublish.log'), 'a')
# check local 'latest' replica
for doc2 in docs2:
dataset_id2 = doc2['id']
# run esgunpublish
run_esgunpublish(project, dataset_id2, esgunpublish_logfile, dry_run)
num_datasets_unpublished += 1
# counter
num_datasets_unpublished = 0
list_retracted = open('~/unpublish_retracted_datasets_%s.txt' % datetime.datetime.today().strftime('%Y%m%d'), 'w')
list_removed = open('~/unpublish_deleted_datasets_%s.txt' % datetime.datetime.today().strftime('%Y%m%d'), 'w')
# query all remote index nodes for at DKRZ published replica
index_nodes, master_solr_dict = get_list_of_shards(log, config, esgf_index_node_url)
for instance_id in replica_list:
for index_node in index_nodes:
try:
if index_node in master_solr_dict:
remote_slave_solr_url = 'http://esgf-data.dkrz.de:%s/solr' % master_solr_dict[index_node]
else:
remote_slave_solr_url = 'https://%s/solr' % index_node
log.info("Querying Solr=%s for dataset %s" % (remote_slave_solr_url, instance_id))
query_original_retracted = ('project:%s&replica:false&instance_id:%s' % (project, instance_id))
docs1 = query_solr(log,
query_original_retracted,
fields,
solr_url=remote_slave_solr_url,
solr_core='datasets')
if len(docs1) > 0:
if docs1['retracted']:
list_retracted.write('%s\n' % '#20'.join(instance_id.split('.v20')))
else:
list_removed.write('%s\n' % '#20'.join(instance_id.split('.v20')))
esgunpublish_logfile.close()
except:
log.error("Error querying index node %s" % remote_slave_solr_url)
list_retracted.close()
list_removed.close()
# index_nodes, master_solr_dict = get_list_of_shards(log, config, esgf_index_node_url)
#
# # counter
# num_datasets_unpublished = 0
#
# # 1) query all remote index nodes for the latest primary datasets
# # that have changed in the given time period
# fields = ['id', 'master_id', 'version', '_timestamp']
# for index_node in index_nodes:
#
# try:
# if index_node in master_solr_dict:
# remote_slave_solr_url = 'http://esgf-data.dkrz.de:%s/solr' % master_solr_dict[index_node]
# else:
# remote_slave_solr_url = 'https://%s/solr' % index_node
# log.info("Querying Solr=%s for datasets with project=%s start_datetime=%s stop_datetime=%s"
# % (remote_slave_solr_url, project, start_datetime, stop_datetime))
# if not start_datetime or not stop_datetime:
# query1 = ('project:%s&replica:false&retracted:true' % project)
# else:
# query1 = ('project:%s&replica:false&retracted:true&_timestamp:[%s TO %s]' % (project, start_datetime, stop_datetime))
# docs1 = query_solr(log,
# query1,
# fields,
# solr_url=remote_slave_solr_url,
# solr_core='datasets')
# if len(docs1) > 0:
# log.info("\tFound %s datasets that have been retracted, checking local Solr for replicas" % len(docs1))
#
# except:
# log.error("Error querying index node %s" % remote_slave_solr_url)
#
# docs1 = []
#
# # 2) query local index for replicas of the same datasets
# for doc1 in docs1:
# v1 = int(doc1['version'])
# dataset_id1 = doc1['id']
# instance_id = dataset_id1.split('|')[0]
# log.info("\tChecking local Solr=%s for replica of dataset=%s version=%s" % (local_master_solr_url, dataset_id1, v1))
#
# query2 = 'instance_id:%s&replica:true' % instance_id
# docs2 = query_solr(log,
# query2,
# fields,
# solr_url=local_master_solr_url,
# solr_core='datasets')
#
# esgunpublish_logfile = open(os.path.join(outdir, 'unpublish.log'), 'a')
#
# # check local 'latest' replica
# for doc2 in docs2:
# dataset_id2 = doc2['id']
# # run esgunpublish
# run_esgunpublish(project, dataset_id2, esgunpublish_logfile, dry_run)
# num_datasets_unpublished += 1
#
# esgunpublish_logfile.close()
log.info("Total number of local replica unpublished=%s" % num_datasets_unpublished)
log.close()
......@@ -394,8 +443,7 @@ def main(argv):
check_mapfiles = False
get_list_of_mapfiles = False
output_folder = '/tmp'
unpublish_retracted = False
do_unpublication_retracted = False
for flag, arg in args:
if flag == '--list-retracted-version':
......@@ -425,14 +473,14 @@ def main(argv):
logfile_path = arg
elif flag in ['-o', '--output']:
output_folder = arg
elif flag == 'unpublish-retracted':
unpublish_retracted = True
elif flag == '--unpublish-retracted':
do_unpublication_retracted = True
if not project:
print usage
sys.exit(0)
if list_retracted == create_maps == get_list_of_mapfiles == check_mapfiles == unpublish_retracted:
if list_retracted == create_maps == get_list_of_mapfiles == check_mapfiles == do_unpublication_retracted:
print "Please use exactly one from ['--list-retracted-version', '--get-list-of-new-mapfiles', '--create-new-mapfiles', '--check-consistency', '--unpublish-retracted']"
sys.exit(0)
......@@ -457,8 +505,12 @@ def main(argv):
if list_retracted:
check_replicas(log, config, project, start_datetime=start_datetime, stop_datetime=stop_datetime, dry_run=dry_run, recipients=recipients)
elif unpublish_retracted:
unpublish_retracted(log, config, project, output_folder, start_datetime=start_datetime, stop_datetime=stop_datetime, dry_run=dry_run, recipients=recipients)
elif do_unpublication_retracted:
if start_datetime and stop_datetime:
unpublish_retracted(log, config, project, output_folder, start_datetime=start_datetime, stop_datetime=stop_datetime, dry_run=dry_run, recipients=recipients)
else:
unpublish_retracted(log, config, project, output_folder, dry_run=dry_run, recipients=recipients)
elif get_list_of_mapfiles:
project_config = SectionParser(section='project:%s' % project.lower(), directory=config_dir)
output_file = open(os.path.join(output_folder, 'new_replica.lst'), 'w')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment