From fff87fe835aada148115f167ee27d774f38ec98a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28HV=29?= Date: Wed, 18 Nov 2020 15:17:51 +0700 Subject: [PATCH 01/13] Adding train/val script --- configs/Base-RCNN-FPN.yaml | 42 ++++++ configs/DLA_mask_rcnn_R_101_FPN_3x.yaml | 18 +++ configs/Mask-RCNN.yaml | 0 tools/train.py | 168 ++++++++++++++++++++++++ val.py => tools/val.py | 0 tools/visualize_json_result.py | 90 +++++++++++++ train.py | 1 - 7 files changed, 318 insertions(+), 1 deletion(-) create mode 100644 configs/DLA_mask_rcnn_R_101_FPN_3x.yaml delete mode 100644 configs/Mask-RCNN.yaml create mode 100644 tools/train.py rename val.py => tools/val.py (100%) create mode 100644 tools/visualize_json_result.py delete mode 100644 train.py diff --git a/configs/Base-RCNN-FPN.yaml b/configs/Base-RCNN-FPN.yaml index e69de29..d40fe5e 100644 --- a/configs/Base-RCNN-FPN.yaml +++ b/configs/Base-RCNN-FPN.yaml @@ -0,0 +1,42 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + BACKBONE: + NAME: "build_resnet_fpn_backbone" + RESNETS: + OUT_FEATURES: ["res2", "res3", "res4", "res5"] + FPN: + IN_FEATURES: ["res2", "res3", "res4", "res5"] + ANCHOR_GENERATOR: + SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map + ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) + RPN: + IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] + PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level + PRE_NMS_TOPK_TEST: 1000 # Per FPN level + # Detectron1 uses 2000 proposals per-batch, + # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) + # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. + POST_NMS_TOPK_TRAIN: 1000 + POST_NMS_TOPK_TEST: 1000 + ROI_HEADS: + NAME: "StandardROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_FC: 2 + POOLER_RESOLUTION: 7 + ROI_MASK_HEAD: + NAME: "MaskRCNNConvUpsampleHead" + NUM_CONV: 4 + POOLER_RESOLUTION: 14 +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.02 + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +VERSION: 2 \ No newline at end of file diff --git a/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml b/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml new file mode 100644 index 0000000..ba70017 --- /dev/null +++ b/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml @@ -0,0 +1,18 @@ +_BASE_: "Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/model_final_a3ec72.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 + ROI_HEADS: + NUM_CLASSES: 5 +DATASETS: + TRAIN: ("dla_train",) + TEST: ("dla_val",) +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 75500 + IMS_PER_BATCH: 2 + BASE_LR: 0.0009 +DATALOADER: + NUM_WORKERS: 1 \ No newline at end of file diff --git a/configs/Mask-RCNN.yaml b/configs/Mask-RCNN.yaml deleted file mode 100644 index e69de29..0000000 diff --git a/tools/train.py b/tools/train.py new file mode 100644 index 0000000..238cd57 --- /dev/null +++ b/tools/train.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. +""" +Detection Training Script. + +This scripts reads a given config file and runs the training or evaluation. +It is an entry point that is made to train standard models in detectron2. + +In order to let one script support training of many models, +this script contains logic that are specific to these built-in models and therefore +may not be suitable for your own project. +For example, your research project perhaps only needs a single "evaluator". + +Therefore, we recommend you to use detectron2 as an library and take +this file as an example of how to use the library. +You may want to write your own script with your datasets and other customizations. +""" + +import logging +import os +from collections import OrderedDict +import torch + +import detectron2.utils.comm as comm +from detectron2.checkpoint import DetectionCheckpointer +from detectron2.config import get_cfg +from detectron2.data import MetadataCatalog +from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch +from detectron2.evaluation import ( + CityscapesInstanceEvaluator, + CityscapesSemSegEvaluator, + COCOEvaluator, + COCOPanopticEvaluator, + DatasetEvaluators, + LVISEvaluator, + PascalVOCDetectionEvaluator, + SemSegEvaluator, + verify_results, +) +from detectron2.modeling import GeneralizedRCNNWithTTA + + +class Trainer(DefaultTrainer): + """ + We use the "DefaultTrainer" which contains pre-defined default logic for + standard training workflow. They may not work for you, especially if you + are working on a new research project. In that case you can write your + own training loop. You can use "tools/plain_train_net.py" as an example. + """ + + @classmethod + def build_evaluator(cls, cfg, dataset_name, output_folder=None): + """ + Create evaluator(s) for a given dataset. + This uses the special metadata "evaluator_type" associated with each builtin dataset. + For your own dataset, you can simply create an evaluator manually in your + script and do not have to worry about the hacky if-else logic here. + """ + if output_folder is None: + output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") + evaluator_list = [] + evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type + if evaluator_type in ["sem_seg", "coco_panoptic_seg"]: + evaluator_list.append( + SemSegEvaluator( + dataset_name, + distributed=True, + output_dir=output_folder, + ) + ) + if evaluator_type in ["coco", "coco_panoptic_seg"]: + evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder)) + if evaluator_type == "coco_panoptic_seg": + evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) + if evaluator_type == "cityscapes_instance": + assert ( + torch.cuda.device_count() >= comm.get_rank() + ), "CityscapesEvaluator currently do not work with multiple machines." + return CityscapesInstanceEvaluator(dataset_name) + if evaluator_type == "cityscapes_sem_seg": + assert ( + torch.cuda.device_count() >= comm.get_rank() + ), "CityscapesEvaluator currently do not work with multiple machines." + return CityscapesSemSegEvaluator(dataset_name) + elif evaluator_type == "pascal_voc": + return PascalVOCDetectionEvaluator(dataset_name) + elif evaluator_type == "lvis": + return LVISEvaluator(dataset_name, cfg, True, output_folder) + if len(evaluator_list) == 0: + raise NotImplementedError( + "no Evaluator for the dataset {} with the type {}".format( + dataset_name, evaluator_type + ) + ) + elif len(evaluator_list) == 1: + return evaluator_list[0] + return DatasetEvaluators(evaluator_list) + + @classmethod + def test_with_TTA(cls, cfg, model): + logger = logging.getLogger("detectron2.trainer") + # In the end of training, run an evaluation with TTA + # Only support some R-CNN models. + logger.info("Running inference with test-time augmentation ...") + model = GeneralizedRCNNWithTTA(cfg, model) + evaluators = [ + cls.build_evaluator( + cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") + ) + for name in cfg.DATASETS.TEST + ] + res = cls.test(cfg, model, evaluators) + res = OrderedDict({k + "_TTA": v for k, v in res.items()}) + return res + + +def setup(args): + """ + Create configs and perform basic setups. + """ + cfg = get_cfg() + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + cfg.freeze() + default_setup(cfg, args) + return cfg + + +def main(args): + cfg = setup(args) + + if args.eval_only: + model = Trainer.build_model(cfg) + DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( + cfg.MODEL.WEIGHTS, resume=args.resume + ) + res = Trainer.test(cfg, model) + if cfg.TEST.AUG.ENABLED: + res.update(Trainer.test_with_TTA(cfg, model)) + if comm.is_main_process(): + verify_results(cfg, res) + return res + + """ + If you'd like to do anything fancier than the standard training logic, + consider writing your own training loop (see plain_train_net.py) or + subclassing the trainer. + """ + trainer = Trainer(cfg) + trainer.resume_or_load(resume=args.resume) + if cfg.TEST.AUG.ENABLED: + trainer.register_hooks( + [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))] + ) + return trainer.train() + + +if __name__ == "__main__": + args = default_argument_parser().parse_args() + print("Command Line Args:", args) + launch( + main, + args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + args=(args,), + ) \ No newline at end of file diff --git a/val.py b/tools/val.py similarity index 100% rename from val.py rename to tools/val.py diff --git a/tools/visualize_json_result.py b/tools/visualize_json_result.py new file mode 100644 index 0000000..ad8c8ed --- /dev/null +++ b/tools/visualize_json_result.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. + +import argparse +import json +import numpy as np +import os +from collections import defaultdict +import cv2 +import tqdm + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.structures import Boxes, BoxMode, Instances +from detectron2.utils.file_io import PathManager +from detectron2.utils.logger import setup_logger +from detectron2.utils.visualizer import Visualizer + + +def create_instances(predictions, image_size): + ret = Instances(image_size) + + score = np.asarray([x["score"] for x in predictions]) + chosen = (score > args.conf_threshold).nonzero()[0] + score = score[chosen] + bbox = np.asarray([predictions[i]["bbox"] for i in chosen]).reshape(-1, 4) + bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) + + labels = np.asarray([dataset_id_map(predictions[i]["category_id"]) for i in chosen]) + + ret.scores = score + ret.pred_boxes = Boxes(bbox) + ret.pred_classes = labels + + try: + ret.pred_masks = [predictions[i]["segmentation"] for i in chosen] + except KeyError: + pass + return ret + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="A script that visualizes the json predictions from COCO or LVIS dataset." + ) + parser.add_argument("--input", required=True, help="JSON file produced by the model") + parser.add_argument("--output", required=True, help="output directory") + parser.add_argument("--dataset", help="name of the dataset", default="coco_2017_val") + parser.add_argument("--conf-threshold", default=0.5, type=float, help="confidence threshold") + args = parser.parse_args() + + logger = setup_logger() + + with PathManager.open(args.input, "r") as f: + predictions = json.load(f) + + pred_by_image = defaultdict(list) + for p in predictions: + pred_by_image[p["image_id"]].append(p) + + dicts = list(DatasetCatalog.get(args.dataset)) + metadata = MetadataCatalog.get(args.dataset) + if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): + + def dataset_id_map(ds_id): + return metadata.thing_dataset_id_to_contiguous_id[ds_id] + + elif "lvis" in args.dataset: + # LVIS results are in the same format as COCO results, but have a different + # mapping from dataset category id to contiguous category id in [0, #categories - 1] + def dataset_id_map(ds_id): + return ds_id - 1 + + else: + raise ValueError("Unsupported dataset: {}".format(args.dataset)) + + os.makedirs(args.output, exist_ok=True) + + for dic in tqdm.tqdm(dicts): + img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1] + basename = os.path.basename(dic["file_name"]) + + predictions = create_instances(pred_by_image[dic["image_id"]], img.shape[:2]) + vis = Visualizer(img, metadata) + vis_pred = vis.draw_instance_predictions(predictions).get_image() + + vis = Visualizer(img, metadata) + vis_gt = vis.draw_dataset_dict(dic).get_image() + + concat = np.concatenate((vis_pred, vis_gt), axis=1) + cv2.imwrite(os.path.join(args.output, basename), concat[:, :, ::-1] \ No newline at end of file diff --git a/train.py b/train.py deleted file mode 100644 index fc80254..0000000 --- a/train.py +++ /dev/null @@ -1 +0,0 @@ -pass \ No newline at end of file From 1f071947546cd9ecd1a130c2c29e4f46010eda9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28HV=29?= Date: Wed, 18 Nov 2020 16:44:34 +0700 Subject: [PATCH 02/13] Adding config to train on custom dataset --- tools/preprocess.py | 28 ++++++++++++++++++++++++++++ tools/train.py | 3 +++ tools/val.py | 0 3 files changed, 31 insertions(+) create mode 100644 tools/preprocess.py delete mode 100644 tools/val.py diff --git a/tools/preprocess.py b/tools/preprocess.py new file mode 100644 index 0000000..ecddb1c --- /dev/null +++ b/tools/preprocess.py @@ -0,0 +1,28 @@ +from detectron2.data.datasets import register_coco_instances + + +PUBLAYNET_CATEGORIES = [ + {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "Background"}, + {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "Text"}, + {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "Title"}, + {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "List"}, + {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "Table"}, + {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "Figure"}, +] + +def _get_publaynet_instances_meta(): + thing_ids = [k["id"] for k in PUBLAYNET_CATEGORIES if k["isthing"] == 1] + thing_colors = [k["color"] for k in PUBLAYNET_CATEGORIES if k["isthing"] == 1] + assert len(thing_ids) == 80, len(thing_ids) + # Mapping from the incontiguous COCO category id to an id in [0, 79] + thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)} + thing_classes = [k["name"] for k in PUBLAYNET_CATEGORIES if k["isthing"] == 1] + ret = { + "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id, + "thing_classes": thing_classes, + "thing_colors": thing_colors, + } + return ret +def register_publaynet_dataset(): + register_coco_instances("publaynet_train", _get_publaynet_instances_meta(), "../datasets/publaynet/train.json", "../datasets/publaynet/train.json") + register_coco_instances("publaynet_val", _get_publaynet_instances_meta(), "../datasets/publaynet/val.json", "../datasets/publaynet/val.json") \ No newline at end of file diff --git a/tools/train.py b/tools/train.py index 238cd57..a9dd4e6 100644 --- a/tools/train.py +++ b/tools/train.py @@ -38,7 +38,9 @@ verify_results, ) from detectron2.modeling import GeneralizedRCNNWithTTA +from .preprocess import register_publaynet_dataset +register_publaynet_dataset() class Trainer(DefaultTrainer): """ @@ -156,6 +158,7 @@ def main(args): if __name__ == "__main__": + args = default_argument_parser().parse_args() print("Command Line Args:", args) launch( diff --git a/tools/val.py b/tools/val.py deleted file mode 100644 index e69de29..0000000 From 4457825d8f0aec32b74fdd8bf5d0550353c0aac3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28HV=29?= Date: Wed, 18 Nov 2020 16:45:39 +0700 Subject: [PATCH 03/13] Change path of model to custom datasets instead of coco dataset --- configs/Base-RCNN-FPN.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/configs/Base-RCNN-FPN.yaml b/configs/Base-RCNN-FPN.yaml index d40fe5e..c1b54cf 100644 --- a/configs/Base-RCNN-FPN.yaml +++ b/configs/Base-RCNN-FPN.yaml @@ -30,8 +30,10 @@ MODEL: NUM_CONV: 4 POOLER_RESOLUTION: 14 DATASETS: - TRAIN: ("coco_2017_train",) - TEST: ("coco_2017_val",) + TRAIN: ('publaynet_train') + TEST: ('publaynet_train') + # TRAIN: ("coco_2017_train",) + # TEST: ("coco_2017_val",) SOLVER: IMS_PER_BATCH: 16 BASE_LR: 0.02 From 30ab5f21c6f29045ecdcdd4ed72bdb6bfbc27106 Mon Sep 17 00:00:00 2001 From: vietnamican Date: Wed, 18 Nov 2020 23:24:42 +0700 Subject: [PATCH 04/13] Adding file to train the publaynet data --- .gitignore | 3 +++ __init__.py | 0 configs/Base-RCNN-FPN.yaml | 6 ++---- configs/DLA_mask_rcnn_R_101_FPN_3x.yaml | 6 +++--- configs/faster_rcnn_R_101_FPN_3x.yaml | 18 ++++++++++++++++++ tools/__init__.py | 1 + tools/preprocess.py | 7 +++---- tools/train.py => train.py | 3 ++- 8 files changed, 32 insertions(+), 12 deletions(-) create mode 100644 __init__.py create mode 100644 configs/faster_rcnn_R_101_FPN_3x.yaml create mode 100644 tools/__init__.py rename tools/train.py => train.py (99%) diff --git a/.gitignore b/.gitignore index e69de29..ebaeab5 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1,3 @@ +datasets/ +output/ +**__pycache__** \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/configs/Base-RCNN-FPN.yaml b/configs/Base-RCNN-FPN.yaml index c1b54cf..d40fe5e 100644 --- a/configs/Base-RCNN-FPN.yaml +++ b/configs/Base-RCNN-FPN.yaml @@ -30,10 +30,8 @@ MODEL: NUM_CONV: 4 POOLER_RESOLUTION: 14 DATASETS: - TRAIN: ('publaynet_train') - TEST: ('publaynet_train') - # TRAIN: ("coco_2017_train",) - # TEST: ("coco_2017_val",) + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) SOLVER: IMS_PER_BATCH: 16 BASE_LR: 0.02 diff --git a/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml b/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml index ba70017..d88a25a 100644 --- a/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml +++ b/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml @@ -5,10 +5,10 @@ MODEL: RESNETS: DEPTH: 101 ROI_HEADS: - NUM_CLASSES: 5 + BATCH_SIZE_PER_IMAGE: 512 DATASETS: - TRAIN: ("dla_train",) - TEST: ("dla_val",) + TRAIN: ("publaynet_train",) + TEST: ("publaynet_val",) SOLVER: STEPS: (210000, 250000) MAX_ITER: 75500 diff --git a/configs/faster_rcnn_R_101_FPN_3x.yaml b/configs/faster_rcnn_R_101_FPN_3x.yaml new file mode 100644 index 0000000..28d7f07 --- /dev/null +++ b/configs/faster_rcnn_R_101_FPN_3x.yaml @@ -0,0 +1,18 @@ +_BASE_: "Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: False + RESNETS: + DEPTH: 101 + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 512 +DATASETS: + TRAIN: ("publaynet_train",) + TEST: ("publaynet_val",) +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 75500 + IMS_PER_BATCH: 2 + BASE_LR: 0.0009 +DATALOADER: + NUM_WORKERS: 1 diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 0000000..66824d3 --- /dev/null +++ b/tools/__init__.py @@ -0,0 +1 @@ +from .preprocess import register_publaynet_dataset \ No newline at end of file diff --git a/tools/preprocess.py b/tools/preprocess.py index ecddb1c..9dd8797 100644 --- a/tools/preprocess.py +++ b/tools/preprocess.py @@ -1,6 +1,5 @@ from detectron2.data.datasets import register_coco_instances - PUBLAYNET_CATEGORIES = [ {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "Background"}, {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "Text"}, @@ -13,7 +12,7 @@ def _get_publaynet_instances_meta(): thing_ids = [k["id"] for k in PUBLAYNET_CATEGORIES if k["isthing"] == 1] thing_colors = [k["color"] for k in PUBLAYNET_CATEGORIES if k["isthing"] == 1] - assert len(thing_ids) == 80, len(thing_ids) + assert len(thing_ids) == 6, len(thing_ids) # Mapping from the incontiguous COCO category id to an id in [0, 79] thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)} thing_classes = [k["name"] for k in PUBLAYNET_CATEGORIES if k["isthing"] == 1] @@ -24,5 +23,5 @@ def _get_publaynet_instances_meta(): } return ret def register_publaynet_dataset(): - register_coco_instances("publaynet_train", _get_publaynet_instances_meta(), "../datasets/publaynet/train.json", "../datasets/publaynet/train.json") - register_coco_instances("publaynet_val", _get_publaynet_instances_meta(), "../datasets/publaynet/val.json", "../datasets/publaynet/val.json") \ No newline at end of file + register_coco_instances("publaynet_train", {}, "datasets/publaynet/val.json", "datasets/publaynet/val/") + register_coco_instances("publaynet_val", {}, "datasets/publaynet/val.json", "datasets/publaynet/val/") \ No newline at end of file diff --git a/tools/train.py b/train.py similarity index 99% rename from tools/train.py rename to train.py index a9dd4e6..b0538ba 100644 --- a/tools/train.py +++ b/train.py @@ -38,7 +38,8 @@ verify_results, ) from detectron2.modeling import GeneralizedRCNNWithTTA -from .preprocess import register_publaynet_dataset + +from tools import register_publaynet_dataset register_publaynet_dataset() From 0bfc3a09b82e414305b3b7e5cd5667896354acc8 Mon Sep 17 00:00:00 2001 From: vietnamican Date: Wed, 18 Nov 2020 23:38:44 +0700 Subject: [PATCH 05/13] Update README.md, add the way of organizing the directory --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index 9b612eb..1a86282 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,16 @@ # Document-Layout-Analysis Tools for extract figure, table, text,... from a pdf document + +The directories should be arranged like this + + root + ├── mmdet + ├── tools + ├── configs + ├── datasets + │ ├── publaynet + │ │ ├── test + │ │ ├── train + │ │ ├── val + │ │ ├── train.json + | | ├── val.json \ No newline at end of file From 8da0ee183818762faa9ebdb19de1588565520650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28VNCDLL-CTPTNLV?= =?UTF-8?q?KHDL=29?= Date: Fri, 20 Nov 2020 16:45:35 +0700 Subject: [PATCH 06/13] Add training instruction, add requirements file --- README.md | 73 +++++++++++++++++-- ...PN_3x.yaml => mask_rcnn_R_101_FPN_3x.yaml} | 0 requirements.txt | 4 + 3 files changed, 72 insertions(+), 5 deletions(-) rename configs/{DLA_mask_rcnn_R_101_FPN_3x.yaml => mask_rcnn_R_101_FPN_3x.yaml} (100%) create mode 100644 requirements.txt diff --git a/README.md b/README.md index 1a86282..78859a4 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,79 @@ # Document-Layout-Analysis Tools for extract figure, table, text,... from a pdf document +## Installation +``` +$ pip install -r requirements.txt +``` +### Install detectron2 +Requirment +- CUDA=10.1 +- PyTorch>=1.7.0 -The directories should be arranged like this +How to install CUDA 10.1 can be found here: https://developer.nvidia.com/cuda-10.1-download-archive-base + +How to install PyTorch can be found here: https://pytorch.org/ + +Afer installed above package, follow the instructions below to install Document-Layout-Analysis: +``` +$ git clone https://github.com/facebookresearch/detectron2.git +$ git checkout 8e3effc +$ python -m pip install -e detectron2 +``` +### Install Document-Layout-Analysis +Follow the instructions below: +``` +$ git clone -b dev https://github.com/Wild-Rift/Document-Layout-Analysis.git +$ cd Document-Layout-Analysis +``` + +## Train +### Dataset + +We use [IBM Publaynet](https://developer.ibm.com/technologies/artificial-intelligence/data/publaynet/) dataset for training and testing. + +It includes 358,353 images, 335,703 training images, 11,245 validation images and 11,405 test images. The category-id label mapping of this dataset is: +| Category id | Label | +| :---: | :--- | +| 1 | Text | +| 2 | Title | +| 3 | List | +| 4 | Table | +| 5 | Figure | + +After download and extract dataset, please put it in ```datasets``` directory. The directories should be arranged like this: root ├── mmdet ├── tools ├── configs + ├── output + │ ├──... + │ ├── datasets │ ├── publaynet - │ │ ├── test - │ │ ├── train - │ │ ├── val + │ │ ├── test/ + │ │ ├── train/ + │ │ ├── val/ │ │ ├── train.json - | | ├── val.json \ No newline at end of file + │ │ ├── val.json + +### Training +Document-Layout-Analysis support training on two models: Faster-RCNN và Mask-RCNN + +``` +$ CONFIG_FILE='configs/faster_rcnn_R_101_FPN_3x' # if use Faster-RCNN model +$ CONFIG_FILE='configs/mask_rcnn_R_101_FPN_3x' #if use Mask-RCNN model +``` +If you want to inspect model's structures, go to ```configs/``` directory + +If you want to training on 8 GPU, run: +``` +$ python train.py --num-gpus 8 --config-file CONFIG_FILE +``` +If you want to training on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), run: +``` +$ python train.py --num-gpus 1 \ + --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \ + SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025 +``` +Checkpoints of model will be store in ```output/``` directory after each epoch. diff --git a/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml b/configs/mask_rcnn_R_101_FPN_3x.yaml similarity index 100% rename from configs/DLA_mask_rcnn_R_101_FPN_3x.yaml rename to configs/mask_rcnn_R_101_FPN_3x.yaml diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c405636 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +numpy=1.9.0 +pytorch=1.7.0 +torchvision=0.8.1 +pyyaml==5.1 \ No newline at end of file From 46693a52dd45eb9271f4c93f78452d3e10509c55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28VNCDLL-CTPTNLV?= =?UTF-8?q?KHDL=29?= Date: Fri, 20 Nov 2020 16:50:41 +0700 Subject: [PATCH 07/13] Fix bug in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 78859a4..207b693 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ How to install CUDA 10.1 can be found here: https://developer.nvidia.com/cuda-10 How to install PyTorch can be found here: https://pytorch.org/ -Afer installed above package, follow the instructions below to install Document-Layout-Analysis: +Afer installed above package, follow the instructions below to install detectron2: ``` $ git clone https://github.com/facebookresearch/detectron2.git $ git checkout 8e3effc From 456a605c1fc6de5fc7f86826aa8fdf36b8c8b118 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28VNCDLL-CTPTNLV?= =?UTF-8?q?KHDL=29?= Date: Fri, 20 Nov 2020 16:51:50 +0700 Subject: [PATCH 08/13] Add training instruction, add requirements file --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 207b693..64a74e5 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ $ python train.py --num-gpus 8 --config-file CONFIG_FILE If you want to training on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), run: ``` $ python train.py --num-gpus 1 \ - --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \ + --config-file CONFIG_FILE \ SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025 ``` Checkpoints of model will be store in ```output/``` directory after each epoch. From d347a63bdbcdec899dce2593fcc1c282f2749560 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28VNCDLL-CTPTNLV?= =?UTF-8?q?KHDL=29?= Date: Fri, 20 Nov 2020 16:53:04 +0700 Subject: [PATCH 09/13] Fix bug in README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 64a74e5..ac11e0a 100644 --- a/README.md +++ b/README.md @@ -61,8 +61,8 @@ After download and extract dataset, please put it in ```datasets``` directory. T Document-Layout-Analysis support training on two models: Faster-RCNN và Mask-RCNN ``` -$ CONFIG_FILE='configs/faster_rcnn_R_101_FPN_3x' # if use Faster-RCNN model -$ CONFIG_FILE='configs/mask_rcnn_R_101_FPN_3x' #if use Mask-RCNN model +$ CONFIG_FILE='configs/faster_rcnn_R_101_FPN_3x' # if use Faster-RCNN model +$ CONFIG_FILE='configs/mask_rcnn_R_101_FPN_3x' # if use Mask-RCNN model ``` If you want to inspect model's structures, go to ```configs/``` directory From c7565adce07a96bf6b65700423a7f1ea54037ead Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28VNCDLL-CTPTNLV?= =?UTF-8?q?KHDL=29?= Date: Fri, 20 Nov 2020 16:53:42 +0700 Subject: [PATCH 10/13] Fix bug in README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ac11e0a..58cdc7b 100644 --- a/README.md +++ b/README.md @@ -61,8 +61,8 @@ After download and extract dataset, please put it in ```datasets``` directory. T Document-Layout-Analysis support training on two models: Faster-RCNN và Mask-RCNN ``` -$ CONFIG_FILE='configs/faster_rcnn_R_101_FPN_3x' # if use Faster-RCNN model -$ CONFIG_FILE='configs/mask_rcnn_R_101_FPN_3x' # if use Mask-RCNN model +$ CONFIG_FILE='configs/faster_rcnn_R_101_FPN_3x.yaml' # if use Faster-RCNN model +$ CONFIG_FILE='configs/mask_rcnn_R_101_FPN_3x.yaml' # if use Mask-RCNN model ``` If you want to inspect model's structures, go to ```configs/``` directory From 456e2f034c2e03ae52515428264d77297b61d8a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28VNCDLL-CTPTNLV?= =?UTF-8?q?KHDL=29?= Date: Fri, 20 Nov 2020 17:00:16 +0700 Subject: [PATCH 11/13] Fix bug in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 58cdc7b..356faf1 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ After download and extract dataset, please put it in ```datasets``` directory. T │ │ ├── val.json ### Training -Document-Layout-Analysis support training on two models: Faster-RCNN và Mask-RCNN +Document-Layout-Analysis support training on two models: Faster-RCNN and Mask-RCNN ``` $ CONFIG_FILE='configs/faster_rcnn_R_101_FPN_3x.yaml' # if use Faster-RCNN model From 3d676d9aac7983792edc551bd97373b6aac89dc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28HV=29?= Date: Sat, 21 Nov 2020 20:16:35 +0700 Subject: [PATCH 12/13] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 356faf1..4328f32 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ Document-Layout-Analysis support training on two models: Faster-RCNN and Mask-RC $ CONFIG_FILE='configs/faster_rcnn_R_101_FPN_3x.yaml' # if use Faster-RCNN model $ CONFIG_FILE='configs/mask_rcnn_R_101_FPN_3x.yaml' # if use Mask-RCNN model ``` -If you want to inspect model's structures, go to ```configs/``` directory +If you want to inspect model's structures, go to ```configs``` directory If you want to training on 8 GPU, run: ``` @@ -76,4 +76,4 @@ $ python train.py --num-gpus 1 \ --config-file CONFIG_FILE \ SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025 ``` -Checkpoints of model will be store in ```output/``` directory after each epoch. +Checkpoints of model will be store in ```output``` directory after each epoch. From 153f8e882bca3ee462bf05969bfbd48c05cd468c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28VNCDLL-CTPTNLV?= =?UTF-8?q?KHDL=29?= Date: Wed, 25 Nov 2020 16:27:03 +0700 Subject: [PATCH 13/13] Fix conflict README.md --- README.md | 83 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 68 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 486f137..11b403a 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,79 @@ -# Document Layout Analysis -> Tools for extract figure, table, text,... from a pdf document +# Document-Layout-Analysis +Tools for extract figure, table, text,... from a pdf document +## Installation +``` +$ pip install -r requirements.txt +``` +### Install detectron2 +Requirment +- CUDA=10.1 +- PyTorch>=1.7.0 +How to install CUDA 10.1 can be found here: https://developer.nvidia.com/cuda-10.1-download-archive-base -## About The Project +How to install PyTorch can be found here: https://pytorch.org/ -![](./image/demo.png) +Afer installed above package, follow the instructions below to install detectron2: +``` +$ git clone https://github.com/facebookresearch/detectron2.git +$ git checkout 8e3effc +$ python -m pip install -e detectron2 +``` +### Install Document-Layout-Analysis +Follow the instructions below: +``` +$ git clone -b dev https://github.com/Wild-Rift/Document-Layout-Analysis.git +$ cd Document-Layout-Analysis +``` -## Installation +## Train +### Dataset -See [Installed](https://github.com/Wild-Rift/Document-Layout-Analysis/tree/staging). +We use [IBM Publaynet](https://developer.ibm.com/technologies/artificial-intelligence/data/publaynet/) dataset for training and testing. -## Training model +It includes 358,353 images, 335,703 training images, 11,245 validation images and 11,405 test images. The category-id label mapping of this dataset is: +| Category id | Label | +| :---: | :--- | +| 1 | Text | +| 2 | Title | +| 3 | List | +| 4 | Table | +| 5 | Figure | -See [Train model](https://github.com/Wild-Rift/Document-Layout-Analysis/tree/dev) +After download and extract dataset, please put it in ```datasets``` directory. The directories should be arranged like this: -## Evalutate model + root + ├── mmdet + ├── tools + ├── configs + ├── output + │ ├──... + │ + ├── datasets + │ ├── publaynet + │ │ ├── test/ + │ │ ├── train/ + │ │ ├── val/ + │ │ ├── train.json + │ │ ├── val.json -See [Colab Notebook](https://colab.research.google.com/drive/1WBzVAgLdldrX6Gs1lbUaUPF63fkjcw4t?usp=sharing) +### Training +Document-Layout-Analysis support training on two models: Faster-RCNN and Mask-RCNN -## My Team. +``` +$ CONFIG_FILE='configs/faster_rcnn_R_101_FPN_3x.yaml' # if use Faster-RCNN model +$ CONFIG_FILE='configs/mask_rcnn_R_101_FPN_3x.yaml' # if use Mask-RCNN model +``` +If you want to inspect model's structures, go to ```configs``` directory -1. [Pham Van Tien](https://github.com/vietnamican) -2. [Nguyen Trung Duc](https://github.com/caoboiyb) -3. [Tran Tien Quan](https://github.com/Lill98) -4. [Bui Xuan Thoai](https://github.com/ThanThoai) \ No newline at end of file +If you want to training on 8 GPU, run: +``` +$ python train.py --num-gpus 8 --config-file CONFIG_FILE +``` +If you want to training on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), run: +``` +$ python train.py --num-gpus 1 \ + --config-file CONFIG_FILE \ + SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025 +``` +Checkpoints of model will be store in ```output``` directory after each epoch. \ No newline at end of file