From fff87fe835aada148115f167ee27d774f38ec98a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28HV=29?=
 <tienpv12@vingroup.net>
Date: Wed, 18 Nov 2020 15:17:51 +0700
Subject: [PATCH 01/13] Adding train/val script

---
 configs/Base-RCNN-FPN.yaml              |  42 ++++++
 configs/DLA_mask_rcnn_R_101_FPN_3x.yaml |  18 +++
 configs/Mask-RCNN.yaml                  |   0
 tools/train.py                          | 168 ++++++++++++++++++++++++
 val.py => tools/val.py                  |   0
 tools/visualize_json_result.py          |  90 +++++++++++++
 train.py                                |   1 -
 7 files changed, 318 insertions(+), 1 deletion(-)
 create mode 100644 configs/DLA_mask_rcnn_R_101_FPN_3x.yaml
 delete mode 100644 configs/Mask-RCNN.yaml
 create mode 100644 tools/train.py
 rename val.py => tools/val.py (100%)
 create mode 100644 tools/visualize_json_result.py
 delete mode 100644 train.py

diff --git a/configs/Base-RCNN-FPN.yaml b/configs/Base-RCNN-FPN.yaml
index e69de29..d40fe5e 100644
--- a/configs/Base-RCNN-FPN.yaml
+++ b/configs/Base-RCNN-FPN.yaml
@@ -0,0 +1,42 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  BACKBONE:
+    NAME: "build_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+  FPN:
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
+    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
+  RPN:
+    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
+    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
+    # Detectron1 uses 2000 proposals per-batch,
+    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+    POST_NMS_TOPK_TRAIN: 1000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "StandardROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+  ROI_MASK_HEAD:
+    NAME: "MaskRCNNConvUpsampleHead"
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
\ No newline at end of file
diff --git a/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml b/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml
new file mode 100644
index 0000000..ba70017
--- /dev/null
+++ b/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml
@@ -0,0 +1,18 @@
+_BASE_: "Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/model_final_a3ec72.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 101
+  ROI_HEADS:
+    NUM_CLASSES: 5
+DATASETS:
+  TRAIN: ("dla_train",)
+  TEST: ("dla_val",)
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 75500
+  IMS_PER_BATCH: 2
+  BASE_LR: 0.0009
+DATALOADER:
+  NUM_WORKERS: 1
\ No newline at end of file
diff --git a/configs/Mask-RCNN.yaml b/configs/Mask-RCNN.yaml
deleted file mode 100644
index e69de29..0000000
diff --git a/tools/train.py b/tools/train.py
new file mode 100644
index 0000000..238cd57
--- /dev/null
+++ b/tools/train.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+Detection Training Script.
+
+This scripts reads a given config file and runs the training or evaluation.
+It is an entry point that is made to train standard models in detectron2.
+
+In order to let one script support training of many models,
+this script contains logic that are specific to these built-in models and therefore
+may not be suitable for your own project.
+For example, your research project perhaps only needs a single "evaluator".
+
+Therefore, we recommend you to use detectron2 as an library and take
+this file as an example of how to use the library.
+You may want to write your own script with your datasets and other customizations.
+"""
+
+import logging
+import os
+from collections import OrderedDict
+import torch
+
+import detectron2.utils.comm as comm
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import get_cfg
+from detectron2.data import MetadataCatalog
+from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch
+from detectron2.evaluation import (
+    CityscapesInstanceEvaluator,
+    CityscapesSemSegEvaluator,
+    COCOEvaluator,
+    COCOPanopticEvaluator,
+    DatasetEvaluators,
+    LVISEvaluator,
+    PascalVOCDetectionEvaluator,
+    SemSegEvaluator,
+    verify_results,
+)
+from detectron2.modeling import GeneralizedRCNNWithTTA
+
+
+class Trainer(DefaultTrainer):
+    """
+    We use the "DefaultTrainer" which contains pre-defined default logic for
+    standard training workflow. They may not work for you, especially if you
+    are working on a new research project. In that case you can write your
+    own training loop. You can use "tools/plain_train_net.py" as an example.
+    """
+
+    @classmethod
+    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
+        """
+        Create evaluator(s) for a given dataset.
+        This uses the special metadata "evaluator_type" associated with each builtin dataset.
+        For your own dataset, you can simply create an evaluator manually in your
+        script and do not have to worry about the hacky if-else logic here.
+        """
+        if output_folder is None:
+            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
+        evaluator_list = []
+        evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
+        if evaluator_type in ["sem_seg", "coco_panoptic_seg"]:
+            evaluator_list.append(
+                SemSegEvaluator(
+                    dataset_name,
+                    distributed=True,
+                    output_dir=output_folder,
+                )
+            )
+        if evaluator_type in ["coco", "coco_panoptic_seg"]:
+            evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder))
+        if evaluator_type == "coco_panoptic_seg":
+            evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
+        if evaluator_type == "cityscapes_instance":
+            assert (
+                torch.cuda.device_count() >= comm.get_rank()
+            ), "CityscapesEvaluator currently do not work with multiple machines."
+            return CityscapesInstanceEvaluator(dataset_name)
+        if evaluator_type == "cityscapes_sem_seg":
+            assert (
+                torch.cuda.device_count() >= comm.get_rank()
+            ), "CityscapesEvaluator currently do not work with multiple machines."
+            return CityscapesSemSegEvaluator(dataset_name)
+        elif evaluator_type == "pascal_voc":
+            return PascalVOCDetectionEvaluator(dataset_name)
+        elif evaluator_type == "lvis":
+            return LVISEvaluator(dataset_name, cfg, True, output_folder)
+        if len(evaluator_list) == 0:
+            raise NotImplementedError(
+                "no Evaluator for the dataset {} with the type {}".format(
+                    dataset_name, evaluator_type
+                )
+            )
+        elif len(evaluator_list) == 1:
+            return evaluator_list[0]
+        return DatasetEvaluators(evaluator_list)
+
+    @classmethod
+    def test_with_TTA(cls, cfg, model):
+        logger = logging.getLogger("detectron2.trainer")
+        # In the end of training, run an evaluation with TTA
+        # Only support some R-CNN models.
+        logger.info("Running inference with test-time augmentation ...")
+        model = GeneralizedRCNNWithTTA(cfg, model)
+        evaluators = [
+            cls.build_evaluator(
+                cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
+            )
+            for name in cfg.DATASETS.TEST
+        ]
+        res = cls.test(cfg, model, evaluators)
+        res = OrderedDict({k + "_TTA": v for k, v in res.items()})
+        return res
+
+
+def setup(args):
+    """
+    Create configs and perform basic setups.
+    """
+    cfg = get_cfg()
+    cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    cfg.freeze()
+    default_setup(cfg, args)
+    return cfg
+
+
+def main(args):
+    cfg = setup(args)
+
+    if args.eval_only:
+        model = Trainer.build_model(cfg)
+        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
+            cfg.MODEL.WEIGHTS, resume=args.resume
+        )
+        res = Trainer.test(cfg, model)
+        if cfg.TEST.AUG.ENABLED:
+            res.update(Trainer.test_with_TTA(cfg, model))
+        if comm.is_main_process():
+            verify_results(cfg, res)
+        return res
+
+    """
+    If you'd like to do anything fancier than the standard training logic,
+    consider writing your own training loop (see plain_train_net.py) or
+    subclassing the trainer.
+    """
+    trainer = Trainer(cfg)
+    trainer.resume_or_load(resume=args.resume)
+    if cfg.TEST.AUG.ENABLED:
+        trainer.register_hooks(
+            [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))]
+        )
+    return trainer.train()
+
+
+if __name__ == "__main__":
+    args = default_argument_parser().parse_args()
+    print("Command Line Args:", args)
+    launch(
+        main,
+        args.num_gpus,
+        num_machines=args.num_machines,
+        machine_rank=args.machine_rank,
+        dist_url=args.dist_url,
+        args=(args,),
+    )   
\ No newline at end of file
diff --git a/val.py b/tools/val.py
similarity index 100%
rename from val.py
rename to tools/val.py
diff --git a/tools/visualize_json_result.py b/tools/visualize_json_result.py
new file mode 100644
index 0000000..ad8c8ed
--- /dev/null
+++ b/tools/visualize_json_result.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import argparse
+import json
+import numpy as np
+import os
+from collections import defaultdict
+import cv2
+import tqdm
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.structures import Boxes, BoxMode, Instances
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import setup_logger
+from detectron2.utils.visualizer import Visualizer
+
+
+def create_instances(predictions, image_size):
+    ret = Instances(image_size)
+
+    score = np.asarray([x["score"] for x in predictions])
+    chosen = (score > args.conf_threshold).nonzero()[0]
+    score = score[chosen]
+    bbox = np.asarray([predictions[i]["bbox"] for i in chosen]).reshape(-1, 4)
+    bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
+
+    labels = np.asarray([dataset_id_map(predictions[i]["category_id"]) for i in chosen])
+
+    ret.scores = score
+    ret.pred_boxes = Boxes(bbox)
+    ret.pred_classes = labels
+
+    try:
+        ret.pred_masks = [predictions[i]["segmentation"] for i in chosen]
+    except KeyError:
+        pass
+    return ret
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="A script that visualizes the json predictions from COCO or LVIS dataset."
+    )
+    parser.add_argument("--input", required=True, help="JSON file produced by the model")
+    parser.add_argument("--output", required=True, help="output directory")
+    parser.add_argument("--dataset", help="name of the dataset", default="coco_2017_val")
+    parser.add_argument("--conf-threshold", default=0.5, type=float, help="confidence threshold")
+    args = parser.parse_args()
+
+    logger = setup_logger()
+
+    with PathManager.open(args.input, "r") as f:
+        predictions = json.load(f)
+
+    pred_by_image = defaultdict(list)
+    for p in predictions:
+        pred_by_image[p["image_id"]].append(p)
+
+    dicts = list(DatasetCatalog.get(args.dataset))
+    metadata = MetadataCatalog.get(args.dataset)
+    if hasattr(metadata, "thing_dataset_id_to_contiguous_id"):
+
+        def dataset_id_map(ds_id):
+            return metadata.thing_dataset_id_to_contiguous_id[ds_id]
+
+    elif "lvis" in args.dataset:
+        # LVIS results are in the same format as COCO results, but have a different
+        # mapping from dataset category id to contiguous category id in [0, #categories - 1]
+        def dataset_id_map(ds_id):
+            return ds_id - 1
+
+    else:
+        raise ValueError("Unsupported dataset: {}".format(args.dataset))
+
+    os.makedirs(args.output, exist_ok=True)
+
+    for dic in tqdm.tqdm(dicts):
+        img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1]
+        basename = os.path.basename(dic["file_name"])
+
+        predictions = create_instances(pred_by_image[dic["image_id"]], img.shape[:2])
+        vis = Visualizer(img, metadata)
+        vis_pred = vis.draw_instance_predictions(predictions).get_image()
+
+        vis = Visualizer(img, metadata)
+        vis_gt = vis.draw_dataset_dict(dic).get_image()
+
+        concat = np.concatenate((vis_pred, vis_gt), axis=1)
+        cv2.imwrite(os.path.join(args.output, basename), concat[:, :, ::-1]
\ No newline at end of file
diff --git a/train.py b/train.py
deleted file mode 100644
index fc80254..0000000
--- a/train.py
+++ /dev/null
@@ -1 +0,0 @@
-pass
\ No newline at end of file

From 1f071947546cd9ecd1a130c2c29e4f46010eda9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28HV=29?=
 <tienpv12@vingroup.net>
Date: Wed, 18 Nov 2020 16:44:34 +0700
Subject: [PATCH 02/13] Adding config to train on custom dataset

---
 tools/preprocess.py | 28 ++++++++++++++++++++++++++++
 tools/train.py      |  3 +++
 tools/val.py        |  0
 3 files changed, 31 insertions(+)
 create mode 100644 tools/preprocess.py
 delete mode 100644 tools/val.py

diff --git a/tools/preprocess.py b/tools/preprocess.py
new file mode 100644
index 0000000..ecddb1c
--- /dev/null
+++ b/tools/preprocess.py
@@ -0,0 +1,28 @@
+from detectron2.data.datasets import register_coco_instances
+
+
+PUBLAYNET_CATEGORIES = [
+    {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "Background"},
+    {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "Text"},
+    {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "Title"},
+    {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "List"},
+    {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "Table"},
+    {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "Figure"},
+]
+
+def _get_publaynet_instances_meta():
+    thing_ids = [k["id"] for k in PUBLAYNET_CATEGORIES if k["isthing"] == 1]
+    thing_colors = [k["color"] for k in PUBLAYNET_CATEGORIES if k["isthing"] == 1]
+    assert len(thing_ids) == 80, len(thing_ids)
+    # Mapping from the incontiguous COCO category id to an id in [0, 79]
+    thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
+    thing_classes = [k["name"] for k in PUBLAYNET_CATEGORIES if k["isthing"] == 1]
+    ret = {
+        "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id,
+        "thing_classes": thing_classes,
+        "thing_colors": thing_colors,
+    }
+    return ret
+def register_publaynet_dataset():
+    register_coco_instances("publaynet_train", _get_publaynet_instances_meta(), "../datasets/publaynet/train.json", "../datasets/publaynet/train.json")
+    register_coco_instances("publaynet_val", _get_publaynet_instances_meta(), "../datasets/publaynet/val.json", "../datasets/publaynet/val.json")
\ No newline at end of file
diff --git a/tools/train.py b/tools/train.py
index 238cd57..a9dd4e6 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -38,7 +38,9 @@
     verify_results,
 )
 from detectron2.modeling import GeneralizedRCNNWithTTA
+from .preprocess import register_publaynet_dataset
 
+register_publaynet_dataset()
 
 class Trainer(DefaultTrainer):
     """
@@ -156,6 +158,7 @@ def main(args):
 
 
 if __name__ == "__main__":
+    
     args = default_argument_parser().parse_args()
     print("Command Line Args:", args)
     launch(
diff --git a/tools/val.py b/tools/val.py
deleted file mode 100644
index e69de29..0000000

From 4457825d8f0aec32b74fdd8bf5d0550353c0aac3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28HV=29?=
 <tienpv12@vingroup.net>
Date: Wed, 18 Nov 2020 16:45:39 +0700
Subject: [PATCH 03/13] Change path of model to custom datasets instead of coco
 dataset

---
 configs/Base-RCNN-FPN.yaml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/configs/Base-RCNN-FPN.yaml b/configs/Base-RCNN-FPN.yaml
index d40fe5e..c1b54cf 100644
--- a/configs/Base-RCNN-FPN.yaml
+++ b/configs/Base-RCNN-FPN.yaml
@@ -30,8 +30,10 @@ MODEL:
     NUM_CONV: 4
     POOLER_RESOLUTION: 14
 DATASETS:
-  TRAIN: ("coco_2017_train",)
-  TEST: ("coco_2017_val",)
+  TRAIN: ('publaynet_train')
+  TEST: ('publaynet_train')
+  # TRAIN: ("coco_2017_train",)
+  # TEST: ("coco_2017_val",)
 SOLVER:
   IMS_PER_BATCH: 16
   BASE_LR: 0.02

From 30ab5f21c6f29045ecdcdd4ed72bdb6bfbc27106 Mon Sep 17 00:00:00 2001
From: vietnamican <vietnamican@gmail.com>
Date: Wed, 18 Nov 2020 23:24:42 +0700
Subject: [PATCH 04/13] Adding file to train the publaynet data

---
 .gitignore                              |  3 +++
 __init__.py                             |  0
 configs/Base-RCNN-FPN.yaml              |  6 ++----
 configs/DLA_mask_rcnn_R_101_FPN_3x.yaml |  6 +++---
 configs/faster_rcnn_R_101_FPN_3x.yaml   | 18 ++++++++++++++++++
 tools/__init__.py                       |  1 +
 tools/preprocess.py                     |  7 +++----
 tools/train.py => train.py              |  3 ++-
 8 files changed, 32 insertions(+), 12 deletions(-)
 create mode 100644 __init__.py
 create mode 100644 configs/faster_rcnn_R_101_FPN_3x.yaml
 create mode 100644 tools/__init__.py
 rename tools/train.py => train.py (99%)

diff --git a/.gitignore b/.gitignore
index e69de29..ebaeab5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
+datasets/
+output/
+**__pycache__**
\ No newline at end of file
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/configs/Base-RCNN-FPN.yaml b/configs/Base-RCNN-FPN.yaml
index c1b54cf..d40fe5e 100644
--- a/configs/Base-RCNN-FPN.yaml
+++ b/configs/Base-RCNN-FPN.yaml
@@ -30,10 +30,8 @@ MODEL:
     NUM_CONV: 4
     POOLER_RESOLUTION: 14
 DATASETS:
-  TRAIN: ('publaynet_train')
-  TEST: ('publaynet_train')
-  # TRAIN: ("coco_2017_train",)
-  # TEST: ("coco_2017_val",)
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
 SOLVER:
   IMS_PER_BATCH: 16
   BASE_LR: 0.02
diff --git a/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml b/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml
index ba70017..d88a25a 100644
--- a/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml
+++ b/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml
@@ -5,10 +5,10 @@ MODEL:
   RESNETS:
     DEPTH: 101
   ROI_HEADS:
-    NUM_CLASSES: 5
+    BATCH_SIZE_PER_IMAGE: 512
 DATASETS:
-  TRAIN: ("dla_train",)
-  TEST: ("dla_val",)
+  TRAIN: ("publaynet_train",)
+  TEST: ("publaynet_val",)
 SOLVER:
   STEPS: (210000, 250000)
   MAX_ITER: 75500
diff --git a/configs/faster_rcnn_R_101_FPN_3x.yaml b/configs/faster_rcnn_R_101_FPN_3x.yaml
new file mode 100644
index 0000000..28d7f07
--- /dev/null
+++ b/configs/faster_rcnn_R_101_FPN_3x.yaml
@@ -0,0 +1,18 @@
+_BASE_: "Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 101
+  ROI_HEADS:
+    BATCH_SIZE_PER_IMAGE: 512
+DATASETS:
+  TRAIN: ("publaynet_train",)
+  TEST: ("publaynet_val",)
+SOLVER:
+  STEPS: (210000, 250000)
+  MAX_ITER: 75500
+  IMS_PER_BATCH: 2
+  BASE_LR: 0.0009
+DATALOADER:
+  NUM_WORKERS: 1
diff --git a/tools/__init__.py b/tools/__init__.py
new file mode 100644
index 0000000..66824d3
--- /dev/null
+++ b/tools/__init__.py
@@ -0,0 +1 @@
+from .preprocess import register_publaynet_dataset
\ No newline at end of file
diff --git a/tools/preprocess.py b/tools/preprocess.py
index ecddb1c..9dd8797 100644
--- a/tools/preprocess.py
+++ b/tools/preprocess.py
@@ -1,6 +1,5 @@
 from detectron2.data.datasets import register_coco_instances
 
-
 PUBLAYNET_CATEGORIES = [
     {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "Background"},
     {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "Text"},
@@ -13,7 +12,7 @@
 def _get_publaynet_instances_meta():
     thing_ids = [k["id"] for k in PUBLAYNET_CATEGORIES if k["isthing"] == 1]
     thing_colors = [k["color"] for k in PUBLAYNET_CATEGORIES if k["isthing"] == 1]
-    assert len(thing_ids) == 80, len(thing_ids)
+    assert len(thing_ids) == 6, len(thing_ids)
     # Mapping from the incontiguous COCO category id to an id in [0, 79]
     thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
     thing_classes = [k["name"] for k in PUBLAYNET_CATEGORIES if k["isthing"] == 1]
@@ -24,5 +23,5 @@ def _get_publaynet_instances_meta():
     }
     return ret
 def register_publaynet_dataset():
-    register_coco_instances("publaynet_train", _get_publaynet_instances_meta(), "../datasets/publaynet/train.json", "../datasets/publaynet/train.json")
-    register_coco_instances("publaynet_val", _get_publaynet_instances_meta(), "../datasets/publaynet/val.json", "../datasets/publaynet/val.json")
\ No newline at end of file
+    register_coco_instances("publaynet_train", {}, "datasets/publaynet/val.json", "datasets/publaynet/val/")
+    register_coco_instances("publaynet_val", {}, "datasets/publaynet/val.json", "datasets/publaynet/val/")
\ No newline at end of file
diff --git a/tools/train.py b/train.py
similarity index 99%
rename from tools/train.py
rename to train.py
index a9dd4e6..b0538ba 100644
--- a/tools/train.py
+++ b/train.py
@@ -38,7 +38,8 @@
     verify_results,
 )
 from detectron2.modeling import GeneralizedRCNNWithTTA
-from .preprocess import register_publaynet_dataset
+
+from tools import register_publaynet_dataset
 
 register_publaynet_dataset()
 

From 0bfc3a09b82e414305b3b7e5cd5667896354acc8 Mon Sep 17 00:00:00 2001
From: vietnamican <vietnamican@gmail.com>
Date: Wed, 18 Nov 2020 23:38:44 +0700
Subject: [PATCH 05/13] Update README.md, add the way of organizing the
 directory

---
 README.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/README.md b/README.md
index 9b612eb..1a86282 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,16 @@
 # Document-Layout-Analysis
 Tools for extract figure, table, text,... from a pdf document
+
+The directories should be arranged like this
+
+    root
+    ├── mmdet
+    ├── tools
+    ├── configs
+    ├── datasets
+    │   ├── publaynet
+    │   │   ├── test
+    │   │   ├── train
+    │   │   ├── val
+    │   │   ├── train.json
+    |   |   ├── val.json
\ No newline at end of file

From 8da0ee183818762faa9ebdb19de1588565520650 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28VNCDLL-CTPTNLV?=
 =?UTF-8?q?KHDL=29?= <tienpv12@vingroup.net>
Date: Fri, 20 Nov 2020 16:45:35 +0700
Subject: [PATCH 06/13] Add training instruction, add requirements file

---
 README.md                                     | 73 +++++++++++++++++--
 ...PN_3x.yaml => mask_rcnn_R_101_FPN_3x.yaml} |  0
 requirements.txt                              |  4 +
 3 files changed, 72 insertions(+), 5 deletions(-)
 rename configs/{DLA_mask_rcnn_R_101_FPN_3x.yaml => mask_rcnn_R_101_FPN_3x.yaml} (100%)
 create mode 100644 requirements.txt

diff --git a/README.md b/README.md
index 1a86282..78859a4 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,79 @@
 # Document-Layout-Analysis
 Tools for extract figure, table, text,... from a pdf document
+## Installation
+```
+$   pip install -r requirements.txt
+```
+### Install detectron2
+Requirment
+- CUDA=10.1 
+- PyTorch>=1.7.0
 
-The directories should be arranged like this
+How to install CUDA 10.1 can be found here: https://developer.nvidia.com/cuda-10.1-download-archive-base
+
+How to install PyTorch can be found here: https://pytorch.org/
+
+Afer installed above package, follow the instructions below to install Document-Layout-Analysis:
+```
+$   git clone https://github.com/facebookresearch/detectron2.git
+$   git checkout 8e3effc
+$   python -m pip install -e detectron2
+```
+### Install Document-Layout-Analysis
+Follow the instructions below:
+```
+$   git clone -b dev https://github.com/Wild-Rift/Document-Layout-Analysis.git
+$   cd Document-Layout-Analysis
+```
+
+## Train
+### Dataset
+
+We use [IBM Publaynet](https://developer.ibm.com/technologies/artificial-intelligence/data/publaynet/) dataset for training and testing.
+
+It includes 358,353 images, 335,703 training images, 11,245 validation images and 11,405 test images. The category-id label mapping of this dataset is: 
+| Category id | Label |
+| :---: | :--- |
+| 1 | Text |
+| 2 | Title |
+| 3 | List |
+| 4 | Table |
+| 5 | Figure |
+
+After download and extract dataset, please put it in ```datasets``` directory. The directories should be arranged like this:
 
     root
     ├── mmdet
     ├── tools
     ├── configs
+    ├── output
+    │   ├──...
+    │
     ├── datasets
     │   ├── publaynet
-    │   │   ├── test
-    │   │   ├── train
-    │   │   ├── val
+    │   │   ├── test/
+    │   │   ├── train/
+    │   │   ├── val/
     │   │   ├── train.json
-    |   |   ├── val.json
\ No newline at end of file
+    │   │   ├── val.json
+
+### Training
+Document-Layout-Analysis support training on two models: Faster-RCNN và Mask-RCNN
+
+```
+$   CONFIG_FILE='configs/faster_rcnn_R_101_FPN_3x' # if use Faster-RCNN model
+$   CONFIG_FILE='configs/mask_rcnn_R_101_FPN_3x' #if use Mask-RCNN model
+```
+If you want to inspect model's structures, go to ```configs/``` directory
+
+If you want to training on 8 GPU, run:
+```
+$   python train.py --num-gpus 8 --config-file CONFIG_FILE
+```
+If you want to training on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), run:
+```
+$   python train.py --num-gpus 1 \
+    --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
+    SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
+```
+Checkpoints of model will be store in ```output/``` directory after each epoch.
diff --git a/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml b/configs/mask_rcnn_R_101_FPN_3x.yaml
similarity index 100%
rename from configs/DLA_mask_rcnn_R_101_FPN_3x.yaml
rename to configs/mask_rcnn_R_101_FPN_3x.yaml
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..c405636
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+numpy=1.9.0
+pytorch=1.7.0
+torchvision=0.8.1
+pyyaml==5.1
\ No newline at end of file

From 46693a52dd45eb9271f4c93f78452d3e10509c55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28VNCDLL-CTPTNLV?=
 =?UTF-8?q?KHDL=29?= <tienpv12@vingroup.net>
Date: Fri, 20 Nov 2020 16:50:41 +0700
Subject: [PATCH 07/13] Fix bug in README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 78859a4..207b693 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ How to install CUDA 10.1 can be found here: https://developer.nvidia.com/cuda-10
 
 How to install PyTorch can be found here: https://pytorch.org/
 
-Afer installed above package, follow the instructions below to install Document-Layout-Analysis:
+Afer installed above package, follow the instructions below to install detectron2:
 ```
 $   git clone https://github.com/facebookresearch/detectron2.git
 $   git checkout 8e3effc

From 456a605c1fc6de5fc7f86826aa8fdf36b8c8b118 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28VNCDLL-CTPTNLV?=
 =?UTF-8?q?KHDL=29?= <tienpv12@vingroup.net>
Date: Fri, 20 Nov 2020 16:51:50 +0700
Subject: [PATCH 08/13] Add training instruction, add requirements file

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 207b693..64a74e5 100644
--- a/README.md
+++ b/README.md
@@ -73,7 +73,7 @@ $   python train.py --num-gpus 8 --config-file CONFIG_FILE
 If you want to training on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), run:
 ```
 $   python train.py --num-gpus 1 \
-    --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
+    --config-file CONFIG_FILE \
     SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
 ```
 Checkpoints of model will be store in ```output/``` directory after each epoch.

From d347a63bdbcdec899dce2593fcc1c282f2749560 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28VNCDLL-CTPTNLV?=
 =?UTF-8?q?KHDL=29?= <tienpv12@vingroup.net>
Date: Fri, 20 Nov 2020 16:53:04 +0700
Subject: [PATCH 09/13] Fix bug in README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 64a74e5..ac11e0a 100644
--- a/README.md
+++ b/README.md
@@ -61,8 +61,8 @@ After download and extract dataset, please put it in ```datasets``` directory. T
 Document-Layout-Analysis support training on two models: Faster-RCNN và Mask-RCNN
 
 ```
-$   CONFIG_FILE='configs/faster_rcnn_R_101_FPN_3x' # if use Faster-RCNN model
-$   CONFIG_FILE='configs/mask_rcnn_R_101_FPN_3x' #if use Mask-RCNN model
+$   CONFIG_FILE='configs/faster_rcnn_R_101_FPN_3x'      # if use Faster-RCNN model
+$   CONFIG_FILE='configs/mask_rcnn_R_101_FPN_3x'        # if use Mask-RCNN model
 ```
 If you want to inspect model's structures, go to ```configs/``` directory
 

From c7565adce07a96bf6b65700423a7f1ea54037ead Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28VNCDLL-CTPTNLV?=
 =?UTF-8?q?KHDL=29?= <tienpv12@vingroup.net>
Date: Fri, 20 Nov 2020 16:53:42 +0700
Subject: [PATCH 10/13] Fix bug in README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index ac11e0a..58cdc7b 100644
--- a/README.md
+++ b/README.md
@@ -61,8 +61,8 @@ After download and extract dataset, please put it in ```datasets``` directory. T
 Document-Layout-Analysis support training on two models: Faster-RCNN và Mask-RCNN
 
 ```
-$   CONFIG_FILE='configs/faster_rcnn_R_101_FPN_3x'      # if use Faster-RCNN model
-$   CONFIG_FILE='configs/mask_rcnn_R_101_FPN_3x'        # if use Mask-RCNN model
+$   CONFIG_FILE='configs/faster_rcnn_R_101_FPN_3x.yaml'      # if use Faster-RCNN model
+$   CONFIG_FILE='configs/mask_rcnn_R_101_FPN_3x.yaml'        # if use Mask-RCNN model
 ```
 If you want to inspect model's structures, go to ```configs/``` directory
 

From 456e2f034c2e03ae52515428264d77297b61d8a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28VNCDLL-CTPTNLV?=
 =?UTF-8?q?KHDL=29?= <tienpv12@vingroup.net>
Date: Fri, 20 Nov 2020 17:00:16 +0700
Subject: [PATCH 11/13] Fix bug in README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 58cdc7b..356faf1 100644
--- a/README.md
+++ b/README.md
@@ -58,7 +58,7 @@ After download and extract dataset, please put it in ```datasets``` directory. T
     │   │   ├── val.json
 
 ### Training
-Document-Layout-Analysis support training on two models: Faster-RCNN và Mask-RCNN
+Document-Layout-Analysis support training on two models: Faster-RCNN and Mask-RCNN
 
 ```
 $   CONFIG_FILE='configs/faster_rcnn_R_101_FPN_3x.yaml'      # if use Faster-RCNN model

From 3d676d9aac7983792edc551bd97373b6aac89dc8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28HV=29?=
 <tienpv12@vingroup.net>
Date: Sat, 21 Nov 2020 20:16:35 +0700
Subject: [PATCH 12/13] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 356faf1..4328f32 100644
--- a/README.md
+++ b/README.md
@@ -64,7 +64,7 @@ Document-Layout-Analysis support training on two models: Faster-RCNN and Mask-RC
 $   CONFIG_FILE='configs/faster_rcnn_R_101_FPN_3x.yaml'      # if use Faster-RCNN model
 $   CONFIG_FILE='configs/mask_rcnn_R_101_FPN_3x.yaml'        # if use Mask-RCNN model
 ```
-If you want to inspect model's structures, go to ```configs/``` directory
+If you want to inspect model's structures, go to ```configs``` directory
 
 If you want to training on 8 GPU, run:
 ```
@@ -76,4 +76,4 @@ $   python train.py --num-gpus 1 \
     --config-file CONFIG_FILE \
     SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
 ```
-Checkpoints of model will be store in ```output/``` directory after each epoch.
+Checkpoints of model will be store in ```output``` directory after each epoch.

From 153f8e882bca3ee462bf05969bfbd48c05cd468c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ph=E1=BA=A1m=20V=C4=83n=20Ti=E1=BA=BFn=20=28VNCDLL-CTPTNLV?=
 =?UTF-8?q?KHDL=29?= <tienpv12@vingroup.net>
Date: Wed, 25 Nov 2020 16:27:03 +0700
Subject: [PATCH 13/13] Fix conflict README.md

---
 README.md | 83 +++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 68 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 486f137..11b403a 100644
--- a/README.md
+++ b/README.md
@@ -1,26 +1,79 @@
-# Document Layout Analysis
-> Tools for extract figure, table, text,... from a pdf document
+# Document-Layout-Analysis
+Tools for extract figure, table, text,... from a pdf document
+## Installation
+```
+$   pip install -r requirements.txt
+```
+### Install detectron2
+Requirment
+- CUDA=10.1 
+- PyTorch>=1.7.0
 
+How to install CUDA 10.1 can be found here: https://developer.nvidia.com/cuda-10.1-download-archive-base
 
-## About The Project
+How to install PyTorch can be found here: https://pytorch.org/
 
-![](./image/demo.png)
+Afer installed above package, follow the instructions below to install detectron2:
+```
+$   git clone https://github.com/facebookresearch/detectron2.git
+$   git checkout 8e3effc
+$   python -m pip install -e detectron2
+```
+### Install Document-Layout-Analysis
+Follow the instructions below:
+```
+$   git clone -b dev https://github.com/Wild-Rift/Document-Layout-Analysis.git
+$   cd Document-Layout-Analysis
+```
 
-## Installation
+## Train
+### Dataset
 
-See [Installed](https://github.com/Wild-Rift/Document-Layout-Analysis/tree/staging).
+We use [IBM Publaynet](https://developer.ibm.com/technologies/artificial-intelligence/data/publaynet/) dataset for training and testing.
 
-## Training model
+It includes 358,353 images, 335,703 training images, 11,245 validation images and 11,405 test images. The category-id label mapping of this dataset is: 
+| Category id | Label |
+| :---: | :--- |
+| 1 | Text |
+| 2 | Title |
+| 3 | List |
+| 4 | Table |
+| 5 | Figure |
 
-See [Train model](https://github.com/Wild-Rift/Document-Layout-Analysis/tree/dev)
+After download and extract dataset, please put it in ```datasets``` directory. The directories should be arranged like this:
 
-## Evalutate model
+    root
+    ├── mmdet
+    ├── tools
+    ├── configs
+    ├── output
+    │   ├──...
+    │
+    ├── datasets
+    │   ├── publaynet
+    │   │   ├── test/
+    │   │   ├── train/
+    │   │   ├── val/
+    │   │   ├── train.json
+    │   │   ├── val.json
 
-See [Colab Notebook](https://colab.research.google.com/drive/1WBzVAgLdldrX6Gs1lbUaUPF63fkjcw4t?usp=sharing)
+### Training
+Document-Layout-Analysis support training on two models: Faster-RCNN and Mask-RCNN
 
-## My Team.
+```
+$   CONFIG_FILE='configs/faster_rcnn_R_101_FPN_3x.yaml'      # if use Faster-RCNN model
+$   CONFIG_FILE='configs/mask_rcnn_R_101_FPN_3x.yaml'        # if use Mask-RCNN model
+```
+If you want to inspect model's structures, go to ```configs``` directory
 
-1.  [Pham Van Tien](https://github.com/vietnamican)
-2.  [Nguyen Trung Duc](https://github.com/caoboiyb)
-3.  [Tran Tien Quan](https://github.com/Lill98)
-4.  [Bui Xuan Thoai](https://github.com/ThanThoai)
\ No newline at end of file
+If you want to training on 8 GPU, run:
+```
+$   python train.py --num-gpus 8 --config-file CONFIG_FILE
+```
+If you want to training on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), run:
+```
+$   python train.py --num-gpus 1 \
+    --config-file CONFIG_FILE \
+    SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
+```
+Checkpoints of model will be store in ```output``` directory after each epoch.
\ No newline at end of file