package juicebox.tools.utils.juicer.hiccups;

import com.google.common.primitives.Doubles;
import com.google.common.primitives.Floats;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
import jcuda.Pointer;
import jcuda.driver.CUdeviceptr;
import jcuda.driver.JCudaDriver;
import jcuda.utils.KernelLauncher;
import juicebox.data.HiCFileTools;
import juicebox.data.MatrixZoomData;
import juicebox.tools.clt.juicer.HiCCUPSRegionHandler;
import juicebox.tools.utils.common.ArrayTools;
import juicebox.tools.utils.common.MatrixTools;
import juicebox.windowui.HiCZoom;
import juicebox.windowui.NormalizationType;
import org.apache.commons.io.IOUtils;
import org.apache.commons.math.linear.RealMatrix;

/* loaded from: input_file:juicebox/tools/utils/juicer/hiccups/GPUController.class */
public class GPUController {
    private static final int blockSize = 16;
    private final KernelLauncher kernelLauncher;
    private final boolean useCPUVersionHiCCUPS;
    private final int windowCPU;
    private final int matrixSizeCPU;
    private final int peakWidthCPU;

    public GPUController(int i, int i2, int i3, boolean z) {
        this.windowCPU = i;
        this.matrixSizeCPU = i2;
        this.peakWidthCPU = i3;
        this.useCPUVersionHiCCUPS = z;
        if (z) {
            this.kernelLauncher = null;
            return;
        }
        this.kernelLauncher = KernelLauncher.compile(readCuFile("HiCCUPSKernel.cu", i, i2, i3), "BasicPeakCallingKernel", new String[0]);
        this.kernelLauncher.setBlockSize(16, 16, 1);
        int ceil = (int) Math.ceil((i2 * 1.0d) / 16.0d);
        this.kernelLauncher.setGridSize(ceil, ceil);
    }

    private static String readCuFile(String str, int i, int i2, int i3) {
        StringBuilder sb = new StringBuilder();
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(GPUController.class.getResourceAsStream(str)));
            Throwable th = null;
            while (true) {
                try {
                    try {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        sb.append(readLine).append(IOUtils.LINE_SEPARATOR_UNIX);
                    } finally {
                    }
                } finally {
                }
            }
            if (bufferedReader != null) {
                if (0 != 0) {
                    try {
                        bufferedReader.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                } else {
                    bufferedReader.close();
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return sb.toString().replaceAll("HiCCUPS_WINDOW", "" + i).replaceAll("HiCCUPS_MATRIX_SIZE", "" + i2).replaceAll("HiCCUPS_PEAK_WIDTH", "" + i3).replaceAll("HiCCUPS_REGION_MARGIN", "20").replaceAll("HiCCUPS_W1_MAX_INDX", "39");
    }

    public GPUOutputContainer process(HiCCUPSRegionHandler hiCCUPSRegionHandler, HiCCUPSRegionContainer hiCCUPSRegionContainer, int i, float[] fArr, float[] fArr2, float[] fArr3, float[] fArr4, NormalizationType normalizationType, HiCZoom hiCZoom) throws NegativeArraySizeException, IOException {
        MatrixZoomData zoomData = hiCCUPSRegionHandler.getZoomData(hiCCUPSRegionContainer, hiCZoom);
        double[] normalizationVector = hiCCUPSRegionHandler.getNormalizationVector(hiCCUPSRegionContainer, hiCZoom);
        double[] expectedVector = hiCCUPSRegionHandler.getExpectedVector(hiCCUPSRegionContainer, hiCZoom);
        int[] rowBounds = hiCCUPSRegionContainer.getRowBounds();
        int[] columnBounds = hiCCUPSRegionContainer.getColumnBounds();
        RealMatrix extractLocalBoundedRegion = HiCFileTools.extractLocalBoundedRegion(zoomData, rowBounds[0], rowBounds[1], columnBounds[0], columnBounds[1], i, i, normalizationType, false);
        float[] array = Floats.toArray(Doubles.asList(MatrixTools.flattenedRowMajorOrderMatrix(extractLocalBoundedRegion)));
        float[] array2 = Floats.toArray(Doubles.asList(expectedVector));
        float[] array3 = Floats.toArray(Doubles.asList(Arrays.copyOfRange(normalizationVector, rowBounds[0], rowBounds[1])));
        float[] array4 = Floats.toArray(Doubles.asList(Arrays.copyOfRange(normalizationVector, columnBounds[0], columnBounds[1])));
        if (array3.length < i) {
            array3 = ArrayTools.padEndOfArray(array3, i, Float.NaN);
        }
        if (array4.length < i) {
            array4 = ArrayTools.padEndOfArray(array4, i, Float.NaN);
        }
        float[] fArr5 = {rowBounds[0]};
        float[] fArr6 = {columnBounds[0]};
        if (this.useCPUVersionHiCCUPS) {
            return runCPUVersion(extractLocalBoundedRegion.getData(), array2, array3, array4, fArr5, fArr6, fArr, fArr2, fArr3, fArr4, rowBounds, columnBounds);
        }
        CUdeviceptr allocateInput = GPUHelper.allocateInput(array);
        CUdeviceptr allocateInput2 = GPUHelper.allocateInput(array2);
        CUdeviceptr allocateInput3 = GPUHelper.allocateInput(array3);
        CUdeviceptr allocateInput4 = GPUHelper.allocateInput(array4);
        CUdeviceptr allocateInput5 = GPUHelper.allocateInput(fArr);
        CUdeviceptr allocateInput6 = GPUHelper.allocateInput(fArr2);
        CUdeviceptr allocateInput7 = GPUHelper.allocateInput(fArr3);
        CUdeviceptr allocateInput8 = GPUHelper.allocateInput(fArr4);
        CUdeviceptr allocateInput9 = GPUHelper.allocateInput(fArr5);
        CUdeviceptr allocateInput10 = GPUHelper.allocateInput(fArr6);
        int i2 = i * i;
        CUdeviceptr allocateOutput = GPUHelper.allocateOutput(i2, 4);
        CUdeviceptr allocateOutput2 = GPUHelper.allocateOutput(i2, 4);
        CUdeviceptr allocateOutput3 = GPUHelper.allocateOutput(i2, 4);
        CUdeviceptr allocateOutput4 = GPUHelper.allocateOutput(i2, 4);
        CUdeviceptr allocateOutput5 = GPUHelper.allocateOutput(i2, 4);
        CUdeviceptr allocateOutput6 = GPUHelper.allocateOutput(i2, 4);
        CUdeviceptr allocateOutput7 = GPUHelper.allocateOutput(i2, 4);
        CUdeviceptr allocateOutput8 = GPUHelper.allocateOutput(i2, 4);
        CUdeviceptr allocateOutput9 = GPUHelper.allocateOutput(i2, 4);
        CUdeviceptr allocateOutput10 = GPUHelper.allocateOutput(i2, 4);
        this.kernelLauncher.call(allocateInput, allocateOutput, allocateOutput2, allocateOutput3, allocateOutput4, allocateOutput9, allocateOutput5, allocateOutput6, allocateOutput7, allocateOutput8, allocateOutput10, allocateInput5, allocateInput6, allocateInput7, allocateInput8, allocateInput2, allocateInput3, allocateInput4, allocateInput9, allocateInput10);
        float[] fArr7 = new float[i2];
        float[] fArr8 = new float[i2];
        float[] fArr9 = new float[i2];
        float[] fArr10 = new float[i2];
        float[] fArr11 = new float[i2];
        float[] fArr12 = new float[i2];
        float[] fArr13 = new float[i2];
        float[] fArr14 = new float[i2];
        float[] fArr15 = new float[i2];
        float[] fArr16 = new float[i2];
        JCudaDriver.cuMemcpyDtoH(Pointer.to(fArr7), allocateOutput, i2 * 4);
        JCudaDriver.cuMemcpyDtoH(Pointer.to(fArr8), allocateOutput2, i2 * 4);
        JCudaDriver.cuMemcpyDtoH(Pointer.to(fArr9), allocateOutput3, i2 * 4);
        JCudaDriver.cuMemcpyDtoH(Pointer.to(fArr10), allocateOutput4, i2 * 4);
        JCudaDriver.cuMemcpyDtoH(Pointer.to(fArr11), allocateOutput5, i2 * 4);
        JCudaDriver.cuMemcpyDtoH(Pointer.to(fArr12), allocateOutput6, i2 * 4);
        JCudaDriver.cuMemcpyDtoH(Pointer.to(fArr13), allocateOutput7, i2 * 4);
        JCudaDriver.cuMemcpyDtoH(Pointer.to(fArr14), allocateOutput8, i2 * 4);
        JCudaDriver.cuMemcpyDtoH(Pointer.to(fArr15), allocateOutput9, i2 * 4);
        JCudaDriver.cuMemcpyDtoH(Pointer.to(fArr16), allocateOutput10, i2 * 4);
        GPUHelper.freeUpMemory(new CUdeviceptr[]{allocateInput, allocateInput2, allocateInput3, allocateInput4, allocateInput5, allocateInput6, allocateInput7, allocateInput8, allocateInput9, allocateInput10, allocateOutput, allocateOutput2, allocateOutput3, allocateOutput4, allocateOutput5, allocateOutput6, allocateOutput7, allocateOutput8, allocateOutput9, allocateOutput10});
        int i3 = rowBounds[5] - rowBounds[4];
        int i4 = columnBounds[5] - columnBounds[4];
        int i5 = rowBounds[2];
        int i6 = columnBounds[2];
        int i7 = i5 + i3;
        int i8 = i6 + i4;
        return new GPUOutputContainer(GPUHelper.GPUArraytoCPUMatrix(fArr15, i, i5, i7, i6, i8), GPUHelper.GPUArraytoCPUMatrix(fArr16, i, i5, i7, i6, i8), GPUHelper.GPUArraytoCPUMatrix(fArr11, i, i5, i7, i6, i8), GPUHelper.GPUArraytoCPUMatrix(fArr12, i, i5, i7, i6, i8), GPUHelper.GPUArraytoCPUMatrix(fArr13, i, i5, i7, i6, i8), GPUHelper.GPUArraytoCPUMatrix(fArr14, i, i5, i7, i6, i8), GPUHelper.GPUArraytoCPUMatrix(fArr7, i, i5, i7, i6, i8), GPUHelper.GPUArraytoCPUMatrix(fArr8, i, i5, i7, i6, i8), GPUHelper.GPUArraytoCPUMatrix(fArr9, i, i5, i7, i6, i8), GPUHelper.GPUArraytoCPUMatrix(fArr10, i, i5, i7, i6, i8));
    }

    private GPUOutputContainer runCPUVersion(double[][] dArr, float[] fArr, float[] fArr2, float[] fArr3, float[] fArr4, float[] fArr5, float[] fArr6, float[] fArr7, float[] fArr8, float[] fArr9, int[] iArr, int[] iArr2) {
        float[][] fArr10 = new float[this.matrixSizeCPU][this.matrixSizeCPU];
        float[][] fArr11 = new float[this.matrixSizeCPU][this.matrixSizeCPU];
        float[][] fArr12 = new float[this.matrixSizeCPU][this.matrixSizeCPU];
        float[][] fArr13 = new float[this.matrixSizeCPU][this.matrixSizeCPU];
        float[][] fArr14 = new float[this.matrixSizeCPU][this.matrixSizeCPU];
        float[][] fArr15 = new float[this.matrixSizeCPU][this.matrixSizeCPU];
        float[][] fArr16 = new float[this.matrixSizeCPU][this.matrixSizeCPU];
        float[][] fArr17 = new float[this.matrixSizeCPU][this.matrixSizeCPU];
        float[][] fArr18 = new float[this.matrixSizeCPU][this.matrixSizeCPU];
        float[][] fArr19 = new float[this.matrixSizeCPU][this.matrixSizeCPU];
        for (int i = 20; i < this.matrixSizeCPU - 20; i++) {
            for (int i2 = 20; i2 < this.matrixSizeCPU - 20; i2++) {
                float f = 0.0f;
                float f2 = 0.0f;
                float f3 = 0.0f;
                float f4 = 0.0f;
                float f5 = 0.0f;
                float f6 = 0.0f;
                float f7 = 0.0f;
                float f8 = 0.0f;
                float f9 = 0.0f;
                float f10 = 0.0f;
                float f11 = 0.0f;
                float f12 = 0.0f;
                int i3 = this.windowCPU;
                int i4 = this.matrixSizeCPU;
                int i5 = this.peakWidthCPU;
                int i6 = (int) (fArr4[0] - fArr5[0]);
                int abs = Math.abs((i + i6) - i2);
                int i7 = i4 - 20;
                int min = Math.min(i3, (abs - 1) / 2);
                if (min <= i5) {
                    min = i5 + 1;
                }
                int min2 = Math.min(min, 20);
                if (i >= 20 && i < i7 && i2 >= 20 && i2 < i7) {
                    for (int i8 = i + 1; i8 <= i + min2; i8++) {
                        for (int i9 = i2 - min2; i9 < i2; i9++) {
                            if (!Double.isNaN(dArr[i8][i9]) && (i8 + i6) - i9 < 0) {
                                f = (float) (f + dArr[i8][i9]);
                                f2 += fArr[Math.abs((i8 + i6) - i9)];
                            }
                        }
                    }
                    for (int i10 = i + 1; i10 <= i + i5; i10++) {
                        for (int i11 = i2 - i5; i11 < i2; i11++) {
                            if (!Double.isNaN(dArr[i10][i11]) && (i10 + i6) - i11 < 0) {
                                f = (float) (f - dArr[i10][i11]);
                                f2 -= fArr[Math.abs((i10 + i6) - i11)];
                            }
                        }
                    }
                    while (f < 16.0f) {
                        f = 0.0f;
                        f2 = 0.0f;
                        min2++;
                        for (int i12 = i + 1; i12 <= i + min2; i12++) {
                            for (int i13 = i2 - min2; i13 < i2; i13++) {
                                if (!Double.isNaN(dArr[i12][i13]) && (i12 + i6) - i13 < 0) {
                                    f = (float) (f + dArr[i12][i13]);
                                    int abs2 = Math.abs((i12 + i6) - i13);
                                    f2 += fArr[abs2];
                                    if (i12 >= i + 1 && i12 < i + i5 + 1 && i13 >= i2 - i5 && i13 < i2) {
                                        f = (float) (f - dArr[i12][i13]);
                                        f2 -= fArr[abs2];
                                    }
                                }
                            }
                        }
                        if (min2 >= 20 || 2 * min2 >= abs) {
                            break;
                        }
                    }
                    for (int i14 = i - min2; i14 <= i + min2; i14++) {
                        for (int i15 = i2 - min2; i15 <= i2 + min2; i15++) {
                            if (!Double.isNaN(dArr[i14][i15]) && (i14 + i6) - i15 < 0) {
                                f3 = (float) (f3 + dArr[i14][i15]);
                                f4 += fArr[Math.abs((i14 + i6) - i15)];
                            }
                        }
                    }
                    for (int i16 = i - i5; i16 <= i + i5; i16++) {
                        for (int i17 = i2 - i5; i17 <= i2 + i5; i17++) {
                            if (!Double.isNaN(dArr[i16][i17]) && (i16 + i6) - i17 < 0) {
                                f3 = (float) (f3 - dArr[i16][i17]);
                                f4 -= fArr[Math.abs((i16 + i6) - i17)];
                            }
                        }
                    }
                    for (int i18 = i - min2; i18 < i - i5; i18++) {
                        if (!Double.isNaN(dArr[i18][i2])) {
                            f3 = (float) (f3 - dArr[i18][i2]);
                            f4 -= fArr[Math.abs((i18 + i6) - i2)];
                        }
                        for (int i19 = -1; i19 <= 1; i19++) {
                            f7 = (float) (f7 + dArr[i18][i2 + i19]);
                            f8 += fArr[Math.abs(((i18 + i6) - i2) - i19)];
                        }
                    }
                    for (int i20 = i + i5 + 1; i20 <= i + min2; i20++) {
                        if (!Double.isNaN(dArr[i20][i2])) {
                            f3 = (float) (f3 - dArr[i20][i2]);
                            f4 -= fArr[Math.abs((i20 + i6) - i2)];
                        }
                        for (int i21 = -1; i21 <= 1; i21++) {
                            f7 = (float) (f7 + dArr[i20][i2 + i21]);
                            f8 += fArr[Math.abs(((i20 + i6) - i2) - i21)];
                        }
                    }
                    for (int i22 = i2 - min2; i22 < i2 - i5; i22++) {
                        if (!Double.isNaN(dArr[i][i22])) {
                            f3 = (float) (f3 - dArr[i][i22]);
                            f4 -= fArr[Math.abs((i + i6) - i22)];
                        }
                        for (int i23 = -1; i23 <= 1; i23++) {
                            f5 = (float) (f5 + dArr[i + i23][i22]);
                            f6 += fArr[Math.abs(((i + i23) + i6) - i22)];
                        }
                    }
                    for (int i24 = i2 + i5 + 1; i24 <= i2 + min2; i24++) {
                        if (!Double.isNaN(dArr[i][i24])) {
                            f3 = (float) (f3 - dArr[i][i24]);
                            f4 -= fArr[Math.abs((i + i6) - i24)];
                        }
                        for (int i25 = -1; i25 <= 1; i25++) {
                            f5 = (float) (f5 + dArr[i + i25][i24]);
                            f6 += fArr[Math.abs(((i + i25) + i6) - i24)];
                        }
                    }
                }
                float f13 = ((f * fArr[abs]) / f2) * fArr2[i] * fArr3[i2];
                float f14 = ((f3 * fArr[abs]) / f4) * fArr2[i] * fArr3[i2];
                float f15 = ((f5 * fArr[abs]) / f6) * fArr2[i] * fArr3[i2];
                float f16 = ((f7 * fArr[abs]) / f8) * fArr2[i] * fArr3[i2];
                float log = (float) Math.log(Math.pow(2.0d, 0.33d));
                if (!Float.isNaN(f13) && !Float.isInfinite(f13)) {
                    f9 = f13 <= 1.0f ? 0.0f : (float) Math.floor(Math.log(f13) / log);
                }
                if (!Float.isNaN(f14) && !Float.isInfinite(f14)) {
                    f10 = f14 <= 1.0f ? 0.0f : (float) Math.floor(Math.log(f14) / log);
                }
                if (!Float.isNaN(f15) && !Float.isInfinite(f15)) {
                    f11 = f15 <= 1.0f ? 0.0f : (float) Math.floor(Math.log(f15) / log);
                }
                if (!Float.isNaN(f16) && !Float.isInfinite(f16)) {
                    f12 = f16 <= 1.0f ? 0.0f : (float) Math.floor(Math.log(f16) / log);
                }
                float min3 = Math.min(f9, 39.0f);
                float min4 = Math.min(f10, 39.0f);
                float min5 = Math.min(f11, 39.0f);
                float min6 = Math.min(f12, 39.0f);
                fArr16[i][i2] = f13;
                fArr17[i][i2] = f14;
                fArr18[i][i2] = f15;
                fArr19[i][i2] = f16;
                float round = (float) Math.round(dArr[i][i2] * fArr2[i] * fArr3[i2]);
                fArr10[i][i2] = round;
                fArr12[i][i2] = min3;
                fArr13[i][i2] = min4;
                fArr14[i][i2] = min5;
                fArr15[i][i2] = min6;
                fArr11[i][i2] = round - Math.max(Math.max(Math.max(fArr6[(int) min3], fArr7[(int) min4]), fArr8[(int) min5]), fArr9[(int) min6]);
            }
        }
        int i26 = iArr[5] - iArr[4];
        int i27 = iArr2[5] - iArr2[4];
        int i28 = iArr[2];
        int i29 = iArr2[2];
        int i30 = i28 + i26;
        int i31 = i29 + i27;
        return new GPUOutputContainer(MatrixTools.extractLocalMatrixRegion(fArr10, i28, i30, i29, i31), MatrixTools.extractLocalMatrixRegion(fArr11, i28, i30, i29, i31), MatrixTools.extractLocalMatrixRegion(fArr12, i28, i30, i29, i31), MatrixTools.extractLocalMatrixRegion(fArr13, i28, i30, i29, i31), MatrixTools.extractLocalMatrixRegion(fArr14, i28, i30, i29, i31), MatrixTools.extractLocalMatrixRegion(fArr15, i28, i30, i29, i31), MatrixTools.extractLocalMatrixRegion(fArr16, i28, i30, i29, i31), MatrixTools.extractLocalMatrixRegion(fArr17, i28, i30, i29, i31), MatrixTools.extractLocalMatrixRegion(fArr18, i28, i30, i29, i31), MatrixTools.extractLocalMatrixRegion(fArr19, i28, i30, i29, i31));
    }
}
