/*
 * Decompiled with CFR 0.152.
 */
package org.netpreserve.jwarc.tools;

import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.WritableByteChannel;
import java.nio.channels.spi.AbstractInterruptibleChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinTask;
import java.util.concurrent.atomic.AtomicLong;
import org.netpreserve.jwarc.WarcCaptureRecord;
import org.netpreserve.jwarc.WarcDigest;
import org.netpreserve.jwarc.WarcPayload;
import org.netpreserve.jwarc.WarcReader;
import org.netpreserve.jwarc.WarcRecord;
import org.netpreserve.jwarc.WarcResponse;
import org.netpreserve.jwarc.WarcRevisit;
import org.netpreserve.jwarc.WarcWriter;
import org.netpreserve.jwarc.cdx.CdxReader;
import org.netpreserve.jwarc.cdx.CdxRecord;

public class DedupeTool {
    private long minimumSize = 256L;
    private String cdxServer;
    private boolean verbose;
    private boolean dryRun;
    private boolean quiet;
    private int threads = Runtime.getRuntime().availableProcessors();
    private Map<WarcDigest, CacheValue> digestCache;
    private final AtomicLong errors = new AtomicLong();

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void deduplicateWarcFile(Path infile, Path outfile) throws IOException {
        long totalRecords = 0L;
        long deduplicatedRecords = 0L;
        long totalSize = 0L;
        long savedSize = 0L;
        try (WarcWriter writer = null;
             FileChannel input = FileChannel.open(infile, new OpenOption[0]);
             WarcReader reader = new WarcReader(input);
             AbstractInterruptibleChannel output = this.dryRun ? null : FileChannel.open(outfile, StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);){
            WarcRecord record = reader.next().orElse(null);
            while (record != null) {
                long position = reader.position();
                WarcRevisit revisit = this.deduplicate(record);
                record = reader.next().orElse(null);
                long length = reader.position() - position;
                ++totalRecords;
                totalSize += length;
                if (revisit == null) {
                    if (this.verbose) {
                        System.out.println((this.dryRun ? "Would copy " : "Copying") + position + ":" + length);
                    }
                    if (this.dryRun) continue;
                    DedupeTool.transferExactly(input, position, length, (FileChannel)output);
                    continue;
                }
                if (this.verbose) {
                    System.out.println((this.dryRun ? "Would write" : "Writing") + " revisit for " + position + ":" + length);
                }
                ++deduplicatedRecords;
                if (writer == null) {
                    writer = this.dryRun ? new WarcWriter(new NullWritableByteChannel(), reader.compression()) : new WarcWriter((WritableByteChannel)((Object)output), reader.compression());
                }
                long beforePosition = writer.position();
                writer.write(revisit);
                long revisitSize = writer.position() - beforePosition;
                savedSize += length - revisitSize;
            }
        }
        if (!this.quiet) {
            double percentage = totalSize > 0L ? (double)savedSize / (double)totalSize * 100.0 : 0.0;
            String action = this.dryRun ? "would dedupe" : "deduped";
            System.out.printf("%s: %s %d/%d records, saving %s/%s (%.2f%%)%n", outfile != null ? outfile.getFileName() : infile.getFileName(), action, deduplicatedRecords, totalRecords, this.formatBytes(savedSize), this.formatBytes(totalSize), percentage);
        }
    }

    private String formatBytes(long bytes) {
        if (bytes < 1024L) {
            return bytes + "B";
        }
        if (bytes < 0x100000L) {
            return String.format("%.2fKB", (double)bytes / 1024.0);
        }
        if (bytes < 0x40000000L) {
            return String.format("%.2fMB", (double)bytes / 1048576.0);
        }
        return String.format("%.2f GB", (double)bytes / 1.073741824E9);
    }

    private static void transferExactly(FileChannel input, long position, long length, FileChannel output) throws IOException {
        long transferred;
        long n;
        for (transferred = 0L; transferred < length; transferred += n) {
            n = input.transferTo(position + transferred, length - transferred, output);
            if (n > 0L) continue;
            throw new IOException("FileChannel.transferTo returned " + n);
        }
        if (transferred != length) {
            throw new IOException("Expected to transfer " + length + " but actually transferred " + transferred);
        }
    }

    private WarcRevisit deduplicate(WarcRecord record) throws IOException {
        CdxRecord match;
        CacheValue cached;
        if (!(record instanceof WarcResponse)) {
            return null;
        }
        WarcResponse response = (WarcResponse)record;
        WarcPayload payload = response.payload().orElse(null);
        if (payload == null || payload.body().size() < this.minimumSize) {
            return null;
        }
        WarcDigest payloadDigest = response.payloadDigest().orElse(null);
        if (payloadDigest == null) {
            return null;
        }
        if (this.digestCache != null && (cached = this.digestCache.get(payloadDigest)) != null) {
            return this.newRevisit(response, cached.id, cached.targetUri, cached.date);
        }
        if (this.cdxServer != null && (match = this.findMatchingCdxRecord(response, payloadDigest.base32())) != null) {
            if (this.digestCache != null) {
                this.digestCache.put(payloadDigest, new CacheValue(null, match.target(), match.date()));
            }
            return this.newRevisit(response, null, match.target(), match.date());
        }
        if (this.digestCache != null) {
            this.digestCache.put(payloadDigest, new CacheValue(response.id(), response.target(), response.date()));
        }
        return null;
    }

    private WarcRevisit newRevisit(WarcResponse response, URI refersTo, String refersToTarget, Instant refersToDate) throws IOException {
        return ((WarcRevisit.Builder)((WarcRevisit.Builder)((WarcRevisit.Builder)new WarcRevisit.Builder(response.target(), WarcRevisit.IDENTICAL_PAYLOAD_DIGEST_1_0).date(response.date())).refersTo(refersTo, refersToTarget, refersToDate).body(response.contentType(), response.http().serializeHeader())).payloadDigest(response.payloadDigest().orElseThrow(AssertionError::new))).build();
    }

    private CdxRecord findMatchingCdxRecord(WarcCaptureRecord capture, String digest) throws IOException {
        URL queryUrl = new URL(this.cdxServer + "?sort=reverse&rows=10&matchType=exact&url=" + URLEncoder.encode(capture.target(), StandardCharsets.UTF_8.name()));
        try (CdxReader response = new CdxReader(queryUrl.openStream());){
            for (CdxRecord record : response) {
                if (!digest.equalsIgnoreCase(record.digest())) continue;
                CdxRecord cdxRecord = record;
                return cdxRecord;
            }
        }
        return null;
    }

    public void setCdxServer(String cdxServer) {
        this.cdxServer = cdxServer;
    }

    public static Path determineOutputPath(Path infile) {
        String[] suffixes = new String[]{".warc.gz", ".warc", ".arc.gz", ".arc"};
        String filename = infile.getFileName().toString();
        Path dir = infile.getParent();
        if (dir == null) {
            dir = Paths.get(".", new String[0]);
        }
        for (String suffix : suffixes) {
            if (!filename.endsWith(suffix)) continue;
            String basename = filename.substring(0, filename.length() - suffix.length());
            return dir.resolve(basename + "-dedup" + suffix);
        }
        return dir.resolve(filename + ".dedup");
    }

    public static void main(String[] args) throws IOException {
        DedupeTool dedupeTool = new DedupeTool();
        ArrayList<Path> infiles = new ArrayList<Path>();
        for (int i = 0; i < args.length; ++i) {
            if (args[i].startsWith("-")) {
                switch (args[i]) {
                    case "--cache-size": {
                        dedupeTool.setCacheSize(Integer.parseInt(args[++i]));
                        break;
                    }
                    case "--cdx-server": {
                        dedupeTool.setCdxServer(args[++i]);
                        break;
                    }
                    case "--minimum-size": {
                        dedupeTool.setMinimumSize(Long.parseLong(args[++i]));
                        break;
                    }
                    case "-j": 
                    case "--threads": {
                        dedupeTool.setThreads(Integer.parseInt(args[++i]));
                        break;
                    }
                    case "-h": 
                    case "--help": {
                        System.out.println("Usage: jwarc dedupe [options] [warc-files...]");
                        System.out.println();
                        System.out.println("Options:");
                        System.out.println("      --cache-size N        Cache N digests for de-duplication (enables cross-URI de-duplication)");
                        System.out.println("      --cdx-server URL      De-deduplicate against a remote CDX server");
                        System.out.println("      --minimum-size BYTES  Minimum payload size to consider de-duplicating (default " + dedupeTool.minimumSize + ")");
                        System.out.println("  -j, --threads N           Number of threads for parallel processing (default " + dedupeTool.threads + ")");
                        System.out.println("  -n, --dry-run             Don't write output, just calculate and print deduplication statistics");
                        System.out.println("  -q, --quiet               Don't print deduplication statistics");
                        System.out.println("  -v, --verbose             Verbose output");
                        return;
                    }
                    case "-v": 
                    case "--verbose": {
                        dedupeTool.setVerbose(true);
                        break;
                    }
                    case "-n": 
                    case "--dry-run": {
                        dedupeTool.setDryRun(true);
                        break;
                    }
                    case "-q": 
                    case "--quiet": {
                        dedupeTool.setQuiet(true);
                        break;
                    }
                    default: {
                        System.err.println("Unrecognized option: " + args[i]);
                        System.err.println("Try `jwarc dedupe --help` for usage information");
                        System.exit(1);
                        return;
                    }
                }
                continue;
            }
            infiles.add(Paths.get(args[i], new String[0]));
        }
        dedupeTool.run(infiles);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void run(List<Path> infiles) throws IOException {
        ForkJoinPool pool = new ForkJoinPool(this.threads);
        try {
            ((ForkJoinTask)pool.submit(() -> infiles.parallelStream().forEach(this::deduplicateWarcFile))).get();
        }
        catch (InterruptedException | ExecutionException e) {
            Thread.currentThread().interrupt();
            System.exit(1);
        }
        finally {
            pool.shutdown();
        }
        if (this.errors.get() > 0L) {
            System.exit(1);
        }
    }

    private void deduplicateWarcFile(Path infile) {
        Path outfile = this.dryRun ? null : DedupeTool.determineOutputPath(infile);
        try {
            this.deduplicateWarcFile(infile, outfile);
        }
        catch (IOException e) {
            System.err.println("Failed to dedupe " + infile + ": " + e);
            if (outfile != null) {
                try {
                    Files.deleteIfExists(outfile);
                }
                catch (IOException ex) {
                    System.err.println("Failed to delete " + outfile + ": " + ex);
                }
            }
            if (!this.quiet) {
                e.printStackTrace(System.err);
            }
            this.errors.incrementAndGet();
        }
    }

    public void setCacheSize(int cacheSize) {
        this.digestCache = cacheSize > 0 ? Collections.synchronizedMap(new LruCache(cacheSize)) : null;
    }

    public void setMinimumSize(long minimumSize) {
        this.minimumSize = minimumSize;
    }

    public void setVerbose(boolean verbose) {
        this.verbose = verbose;
    }

    public void setDryRun(boolean dryRun) {
        this.dryRun = dryRun;
    }

    public void setQuiet(boolean quiet) {
        this.quiet = quiet;
    }

    public void setThreads(int threads) {
        this.threads = Math.max(1, threads);
    }

    private static class NullWritableByteChannel
    implements WritableByteChannel {
        private boolean open = true;

        private NullWritableByteChannel() {
        }

        @Override
        public int write(ByteBuffer src) {
            int remaining = src.remaining();
            src.position(src.limit());
            return remaining;
        }

        @Override
        public boolean isOpen() {
            return this.open;
        }

        @Override
        public void close() {
            this.open = false;
        }
    }

    private static class CacheValue {
        final URI id;
        final String targetUri;
        final Instant date;

        private CacheValue(URI id, String targetUri, Instant date) {
            this.id = id;
            this.targetUri = targetUri;
            this.date = date;
        }
    }

    private static class LruCache<K, V>
    extends LinkedHashMap<K, V> {
        private final int maxSize;

        public LruCache(int maxSize) {
            super(maxSize, 0.75f, true);
            this.maxSize = maxSize;
        }

        @Override
        protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
            return this.size() > this.maxSize;
        }
    }
}

