package org.aksw.gerbil.dataset.datahub;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.aksw.gerbil.config.GerbilConfiguration;
import org.aksw.gerbil.dataset.datahub.model.Dataset;
import org.aksw.gerbil.dataset.datahub.model.Resource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.client.RestTemplate;

/* loaded from: input_file:org/aksw/gerbil/dataset/datahub/DatahubNIFLoader.class */
public class DatahubNIFLoader {
    private static final Logger LOGGER = LoggerFactory.getLogger(DatahubNIFLoader.class);
    private static final String DATAHUB_NIF_CORPUS_META_INF_URL_PROPERTY_NAME = "org.aksw.gerbil.datasets.DatahubNIFLoader.metaInfURL";
    private static final String DATAHUB_TAG_INF_URL_PROPERTY_NAME = "org.aksw.gerbil.datasets.DatahubNIFLoader.tagInfURL";
    private static final String DATAHUB_NEEDED_TAGS_ARRAY_PROPERTY_NAME = "org.aksw.gerbil.datasets.DatahubNIFLoader.corpusTags";
    private RestTemplate rt = new RestTemplate();
    private String[] neededTags;
    private Map<String, String> datasets;

    public DatahubNIFLoader() {
        init();
    }

    private void init() {
        this.neededTags = GerbilConfiguration.getInstance().getStringArray(DATAHUB_NEEDED_TAGS_ARRAY_PROPERTY_NAME);
        if (this.neededTags == null) {
            LOGGER.error("Couldn't load the needed property \"{}\".", DATAHUB_NEEDED_TAGS_ARRAY_PROPERTY_NAME);
            this.neededTags = new String[0];
        }
        getNIFDataSetsMetaInformation(getNIFDataSets());
    }

    private void getNIFDataSetsMetaInformation(List<String> list) {
        this.datasets = Maps.newHashMap();
        String string = GerbilConfiguration.getInstance().getString(DATAHUB_NIF_CORPUS_META_INF_URL_PROPERTY_NAME);
        if (string == null) {
            LOGGER.error("Couldn't load the needed property \"{}\". Aborting.", DATAHUB_NIF_CORPUS_META_INF_URL_PROPERTY_NAME);
            return;
        }
        for (String str : list) {
            ResponseEntity forEntity = this.rt.getForEntity(string + str, Dataset.Response.class, new Object[0]);
            if (forEntity.getStatusCode().equals(HttpStatus.OK)) {
                Iterator<Resource> it = ((Dataset.Response) forEntity.getBody()).getResult().getResources().iterator();
                while (it.hasNext()) {
                    String url = it.next().getUrl();
                    LOGGER.debug("checking {}", url);
                    long contentLength = this.rt.headForHeaders(url, new Object[0]).getContentLength();
                    LOGGER.debug("{} bytes", Long.valueOf(contentLength));
                    if (contentLength < 20000000 && url.endsWith(".ttl") && !url.endsWith("dataid.ttl")) {
                        LOGGER.debug("{}: {} has less than 20mb and is turtle > add to Dataset", str, url);
                        this.datasets.put(str, url);
                    }
                }
            }
        }
    }

    private List<String> getNIFDataSets() {
        ArrayList newArrayList = Lists.newArrayList();
        String string = GerbilConfiguration.getInstance().getString(DATAHUB_TAG_INF_URL_PROPERTY_NAME);
        if (string == null) {
            LOGGER.error("Couldn't load the needed property \"{}\". Aborting.", DATAHUB_TAG_INF_URL_PROPERTY_NAME);
        } else {
            HashSet hashSet = null;
            for (int i = 0; i < this.neededTags.length; i++) {
                try {
                    ResponseEntity forEntity = this.rt.getForEntity(string + this.neededTags[i], String[].class, new Object[0]);
                    if (forEntity.getStatusCode().equals(HttpStatus.OK)) {
                        String[] strArr = (String[]) forEntity.getBody();
                        if (hashSet == null) {
                            hashSet = Sets.newHashSet(strArr);
                            LOGGER.debug("corpora with \"{}\" tag {}", this.neededTags[i], hashSet);
                        } else {
                            HashSet newHashSet = Sets.newHashSet(strArr);
                            LOGGER.debug("corpora with \"{}\" tag {}", this.neededTags[i], newHashSet);
                            hashSet = Sets.intersection(hashSet, newHashSet);
                        }
                    } else {
                        LOGGER.warn("Couldn't get any datasets with the {} tag from DataHubIO. Status: ", this.neededTags[i], forEntity.getStatusCode());
                    }
                } catch (Exception e) {
                    LOGGER.warn("Couldn't get any datasets with the {} tag from DataHubIO. Exception: {}", this.neededTags[i], e);
                }
            }
        }
        return newArrayList;
    }

    public static void main(String[] strArr) {
        for (Map.Entry<String, String> entry : new DatahubNIFLoader().datasets.entrySet()) {
            LOGGER.debug("{}: {}", entry.getKey(), entry.getValue());
        }
    }

    public Map<String, String> getDataSets() {
        return this.datasets;
    }
}
