|
|
@@ -0,0 +1,393 @@
|
|
|
+package com.uas.search.console.b2b.support;
|
|
|
+
|
|
|
+import com.uas.search.b2b.model.PageParams;
|
|
|
+import com.uas.search.b2b.service.SearchService;
|
|
|
+import com.uas.search.console.b2b.model.PageInfo;
|
|
|
+import com.uas.search.console.b2b.util.ClassAndTableNameUtils;
|
|
|
+import com.uas.search.console.b2b.util.FileUtils;
|
|
|
+import com.uas.search.console.b2b.util.SearchUtils;
|
|
|
+import com.uas.search.util.ArrayUtils;
|
|
|
+import com.uas.search.util.CollectionUtils;
|
|
|
+import org.slf4j.Logger;
|
|
|
+import org.slf4j.LoggerFactory;
|
|
|
+import org.springframework.data.jpa.repository.JpaRepository;
|
|
|
+
|
|
|
+import java.io.*;
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.List;
|
|
|
+import java.util.concurrent.*;
|
|
|
+
|
|
|
+import static com.uas.search.console.b2b.service.impl.IndexServiceImpl.PAGE_SIZE;
|
|
|
+import static com.uas.search.console.b2b.service.impl.IndexServiceImpl.SINGLE_FILE_MAX_SIZE;
|
|
|
+
|
|
|
+/**
|
|
|
+ * 下载数据的辅助类
|
|
|
+ *
|
|
|
+ * @author sunyj
|
|
|
+ * @since 2017/11/25 17:30
|
|
|
+ */
|
|
|
+public class DownloadHelper<T> {
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 线程最小数量
|
|
|
+ */
|
|
|
+ private final int MIN_THREAD_SIZE = 1;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 线程最大数量
|
|
|
+ */
|
|
|
+ private final int MAX_THREAD_SIZE = 10000;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 默认开始的文件
|
|
|
+ */
|
|
|
+ private final int DEFAULT_START_FILE_INDEX = 1;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 默认结束的文件
|
|
|
+ */
|
|
|
+ private final int DEFAULT_END_FILE_INDEX = 1024 * 1024 * 1024;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 线程数量
|
|
|
+ */
|
|
|
+ private Integer threadSize;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 开始的文件
|
|
|
+ */
|
|
|
+ private Integer startFileIndex;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 结束的文件
|
|
|
+ */
|
|
|
+ private Integer endFileIndex;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 要下载的实体类
|
|
|
+ */
|
|
|
+ private Class<T> clazz;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 排序字段
|
|
|
+ */
|
|
|
+ private String sortField;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * dao
|
|
|
+ */
|
|
|
+ private JpaRepository<T, Long> dao;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 下载的实现
|
|
|
+ */
|
|
|
+ private DownloadService<T> downloadService;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 下载完成后,是否对结果进行校验
|
|
|
+ */
|
|
|
+ private ValidateResult validateResult;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 线程管理
|
|
|
+ */
|
|
|
+ private ExecutorService executorService;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 收集执行结果
|
|
|
+ */
|
|
|
+ private CompletionService<Long> completionService;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 要下载表的数据总行数
|
|
|
+ */
|
|
|
+ private Long totalElements;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 执行结果
|
|
|
+ */
|
|
|
+ private Long result;
|
|
|
+
|
|
|
+ private Logger logger = LoggerFactory.getLogger(getClass());
|
|
|
+
|
|
|
+ /**
|
|
|
+ * @param threadSize 线程数量
|
|
|
+ * @param startFileIndex 开始的文件
|
|
|
+ * @param endFileIndex 结束的文件
|
|
|
+ * @param clazz 要下载的实体类
|
|
|
+ * @param sortField 排序字段
|
|
|
+ * @param dao dao
|
|
|
+ * @param downloadService 下载的实现
|
|
|
+ */
|
|
|
+ public DownloadHelper(Integer threadSize, Integer startFileIndex, Integer endFileIndex, Class<T> clazz, String sortField, JpaRepository<T, Long> dao, DownloadService<T> downloadService) {
|
|
|
+ this(threadSize, startFileIndex, endFileIndex, clazz, sortField, dao, downloadService, ValidateResult.CURRENT);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * @param threadSize 线程数量
|
|
|
+ * @param startFileIndex 开始的文件
|
|
|
+ * @param endFileIndex 结束的文件
|
|
|
+ * @param clazz 要下载的实体类
|
|
|
+ * @param sortField 排序字段
|
|
|
+ * @param dao dao
|
|
|
+ * @param downloadService 下载的实现
|
|
|
+ * @param validateResult 下载完成后,是否对结果进行校验
|
|
|
+ */
|
|
|
+ public DownloadHelper(Integer threadSize, Integer startFileIndex, Integer endFileIndex, Class<T> clazz, String sortField, JpaRepository<T, Long> dao, DownloadService<T> downloadService, ValidateResult validateResult) {
|
|
|
+ if (threadSize == null || threadSize < MIN_THREAD_SIZE || threadSize > MAX_THREAD_SIZE) {
|
|
|
+ throw new IllegalArgumentException("threadSize is between " + MIN_THREAD_SIZE + " and " + MAX_THREAD_SIZE);
|
|
|
+ }
|
|
|
+ if (downloadService == null) {
|
|
|
+ throw new IllegalArgumentException("runnable is null");
|
|
|
+ }
|
|
|
+ this.threadSize = threadSize;
|
|
|
+ this.downloadService = downloadService;
|
|
|
+ this.startFileIndex = startFileIndex == null || startFileIndex < DEFAULT_START_FILE_INDEX ? DEFAULT_START_FILE_INDEX : startFileIndex;
|
|
|
+ this.endFileIndex = endFileIndex == null || endFileIndex < DEFAULT_START_FILE_INDEX ? DEFAULT_END_FILE_INDEX : endFileIndex;
|
|
|
+ this.clazz = clazz;
|
|
|
+ this.sortField = sortField;
|
|
|
+ this.dao = dao;
|
|
|
+ this.validateResult = validateResult;
|
|
|
+ start();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 开始下载
|
|
|
+ */
|
|
|
+ private void start() {
|
|
|
+ SearchService.Table_name tableName = ClassAndTableNameUtils.toTableName(clazz);
|
|
|
+ executorService = Executors.newCachedThreadPool();
|
|
|
+ completionService = new ExecutorCompletionService<>(executorService);
|
|
|
+ if (startFileIndex == DEFAULT_START_FILE_INDEX && endFileIndex == DEFAULT_END_FILE_INDEX) {
|
|
|
+ // 删除旧的文件
|
|
|
+ FileUtils.deleteSubFiles(new File(SearchUtils.getDataPath(tableName)));
|
|
|
+ }
|
|
|
+ getTotalElements();
|
|
|
+ logger.info(tableName + " 发现数据 " + totalElements + " 条");
|
|
|
+ // 线程数量不可高于下载的文件数量
|
|
|
+ threadSize = threadSize <= endFileIndex - startFileIndex + 1 ? threadSize : endFileIndex - startFileIndex + 1;
|
|
|
+ for (int i = 0; i < threadSize; i++) {
|
|
|
+ completionService.submit(getTask(i, threadSize, startFileIndex + i, endFileIndex, tableName, sortField, dao));
|
|
|
+ }
|
|
|
+ waitResult();
|
|
|
+ if (validateResult != null && validateResult != ValidateResult.NONE) {
|
|
|
+ // 对结果进行校验,只校验一定的次数,防止因特殊原因,某些数据始终无法成功下载,陷入死循环
|
|
|
+ int retry = 1;
|
|
|
+ List<Integer> missingFiles;
|
|
|
+ logger.info("校验下载结果");
|
|
|
+ while (retry <= 5 && !CollectionUtils.isEmpty(missingFiles = validateResult())) {
|
|
|
+ logger.error("第 " + retry + " 次校验,下载遗失的文件:" + missingFiles);
|
|
|
+ List<DownloadHelper<T>> downloadHelpers = new ArrayList<>();
|
|
|
+ for (Integer missingFile : missingFiles) {
|
|
|
+ downloadHelpers.add(new DownloadHelper<>(1, missingFile, missingFile, clazz, sortField, dao, downloadService, ValidateResult.NONE));
|
|
|
+ }
|
|
|
+ for (DownloadHelper downloadHelper : downloadHelpers) {
|
|
|
+ downloadHelper.getResult();
|
|
|
+ }
|
|
|
+ retry++;
|
|
|
+ }
|
|
|
+ if (!CollectionUtils.isEmpty(missingFiles = validateResult())) {
|
|
|
+ throw new IllegalStateException("部分数据下载失败:" + missingFiles);
|
|
|
+ } else{
|
|
|
+ logger.info(tableName.value() + " 数据下载成功:" + result + " 条");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 获取要下载表的数据总行数
|
|
|
+ */
|
|
|
+ private void getTotalElements() {
|
|
|
+ PageParams pageParams = new PageParams();
|
|
|
+ pageParams.setPage(startFileIndex);
|
|
|
+ pageParams.setSize(PAGE_SIZE);
|
|
|
+ PageInfo pageInfo = new PageInfo(pageParams);
|
|
|
+ totalElements = dao.findAll(pageInfo).getTotalElements();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 获取任务
|
|
|
+ *
|
|
|
+ * @param id 线程 id
|
|
|
+ * @param step 新增文件时,文件 id 的自增步长,(即线程数量)
|
|
|
+ * @param startFileIndex 开始的文件
|
|
|
+ * @param endFileIndex 结束的文件
|
|
|
+ * @param tableName 要下载的表
|
|
|
+ * @param sortField 排序字段
|
|
|
+ * @param dao dao
|
|
|
+ * @return 任务
|
|
|
+ */
|
|
|
+ private Callable<Long> getTask(final int id, final int step, final int startFileIndex, final int endFileIndex, final SearchService.Table_name tableName, final String sortField, final JpaRepository<T, Long> dao) {
|
|
|
+ return new Callable<Long>() {
|
|
|
+ @Override
|
|
|
+ public Long call() throws Exception {
|
|
|
+ return downloadService.download(id, step, startFileIndex, endFileIndex, tableName, sortField, dao);
|
|
|
+ }
|
|
|
+ };
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 等待执行结果
|
|
|
+ */
|
|
|
+ private void waitResult() {
|
|
|
+ if (executorService.isShutdown() || executorService.isTerminated()) {
|
|
|
+ throw new IllegalStateException("结果已返回,不可再次获取");
|
|
|
+ }
|
|
|
+ result = 0L;
|
|
|
+ for (int i = 0; i < threadSize; i++) {
|
|
|
+ try {
|
|
|
+ Future<Long> future = completionService.take();
|
|
|
+ Long count = future.get();
|
|
|
+ result += (count == null ? 0L : count);
|
|
|
+ } catch (InterruptedException | ExecutionException e) {
|
|
|
+ throw new IllegalStateException("获取下载结果失败", e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ executorService.shutdown();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 获取执行结果
|
|
|
+ *
|
|
|
+ * @return 下载的总数量
|
|
|
+ */
|
|
|
+ public long getResult() {
|
|
|
+ if (result == null) {
|
|
|
+ waitResult();
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 校验结果
|
|
|
+ *
|
|
|
+ * @return 未下载成功的文件
|
|
|
+ */
|
|
|
+ private List<Integer> validateResult() {
|
|
|
+ SearchService.Table_name tableName = ClassAndTableNameUtils.toTableName(clazz);
|
|
|
+ int totalFiles = (int) Math.ceil(totalElements / (1.0 * SINGLE_FILE_MAX_SIZE));
|
|
|
+ // 期待的起始、结束文件
|
|
|
+ final int expectStartFileIndex;
|
|
|
+ final int expectEndFileIndex;
|
|
|
+ if (validateResult == ValidateResult.CURRENT) {
|
|
|
+ // 校验本次下载的文件
|
|
|
+ expectStartFileIndex = startFileIndex;
|
|
|
+ expectEndFileIndex = endFileIndex < totalFiles ? endFileIndex : totalFiles;
|
|
|
+ } else if (validateResult == ValidateResult.ALL) {
|
|
|
+ // 校验所有文件
|
|
|
+ expectStartFileIndex = DEFAULT_START_FILE_INDEX;
|
|
|
+ expectEndFileIndex = totalFiles;
|
|
|
+ } else {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ List<Integer> missingFiles = new ArrayList<>();
|
|
|
+
|
|
|
+ File[] files = new File(SearchUtils.getDataPath(tableName)).listFiles(new FileFilter() {
|
|
|
+ @Override
|
|
|
+ public boolean accept(File pathname) {
|
|
|
+ // 只保留指定格式的文件,类似 0000000323.txt
|
|
|
+ if (!pathname.isFile() || !pathname.getName().matches("[0]*?[\\d]+?.txt")) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ int id = parseName(pathname);
|
|
|
+ return id >= expectStartFileIndex && id <= expectEndFileIndex;
|
|
|
+ }
|
|
|
+ });
|
|
|
+ if (ArrayUtils.isEmpty(files)) {
|
|
|
+ return missingFiles;
|
|
|
+ }
|
|
|
+ // 将文件按名称排序
|
|
|
+ sort(files);
|
|
|
+ // 默认把所有文件视为未下载成功
|
|
|
+ for (int i = expectStartFileIndex; i <= expectEndFileIndex; i++) {
|
|
|
+ missingFiles.add(i);
|
|
|
+ }
|
|
|
+ result = 0L;
|
|
|
+ for (File file : files) {
|
|
|
+ // 读取文件,如果文件行数正常(如果不是最后一个文件,行数必须超过限定值),根据文件 id 将其从 missingFiles 中移出
|
|
|
+ int id = parseName(file);
|
|
|
+ int readLineCount = readLineCount(file);
|
|
|
+ if (readLineCount >= SINGLE_FILE_MAX_SIZE || (id == totalFiles && readLineCount > 0)) {
|
|
|
+ result += readLineCount;
|
|
|
+ missingFiles.remove(new Integer(id));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return missingFiles;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 获取文件行数
|
|
|
+ *
|
|
|
+ * @param file 文件
|
|
|
+ * @return 行数
|
|
|
+ */
|
|
|
+ private int readLineCount(File file) {
|
|
|
+ try (BufferedReader bufferedReader = new BufferedReader(new FileReader(file))) {
|
|
|
+ int count = 0;
|
|
|
+ while (bufferedReader.readLine() != null) {
|
|
|
+ count++;
|
|
|
+ }
|
|
|
+ return count;
|
|
|
+ } catch (IOException e) {
|
|
|
+ throw new IllegalStateException("校验下载的文件时,读取文件出错", e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 根据文件的名称进行排序(增序)
|
|
|
+ *
|
|
|
+ * @param files
|
|
|
+ */
|
|
|
+ public void sort(File[] files) {
|
|
|
+ int N = files.length;
|
|
|
+ int h = 1;
|
|
|
+ while (h < N / 3) {
|
|
|
+ h = h * 3 + 1;
|
|
|
+ }
|
|
|
+ while (h >= 1) {
|
|
|
+ for (int i = h; i < N; i++) {
|
|
|
+ for (int j = i; j >= h && less(files[j], files[j - h]); j -= h) {
|
|
|
+ exchange(files, j, j - h);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ h /= 3;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private boolean less(File f1, File f2) {
|
|
|
+ return parseName(f1) - parseName(f2) < 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 解析文件名称为数字
|
|
|
+ */
|
|
|
+ private int parseName(File f) {
|
|
|
+ return Integer.parseInt(f.getName().replaceAll(".txt", ""));
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 交换位置
|
|
|
+ */
|
|
|
+ private void exchange(File[] files, int i, int j) {
|
|
|
+ File temp = files[i];
|
|
|
+ files[i] = files[j];
|
|
|
+ files[j] = temp;
|
|
|
+ }
|
|
|
+
|
|
|
+ public enum ValidateResult {
|
|
|
+ /**
|
|
|
+ * 不校验
|
|
|
+ */
|
|
|
+ NONE,
|
|
|
+ /**
|
|
|
+ * 校验所有文件
|
|
|
+ */
|
|
|
+ ALL,
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 只校验本次下载的文件
|
|
|
+ */
|
|
|
+ CURRENT
|
|
|
+ }
|
|
|
+
|
|
|
+}
|