nagadomi 8 лет назад
Родитель
Сommit
6f437e4014
2 измененных файлов с 98 добавлено и 31 удалено
  1. 64 31
      appendix/benchmark.md
  2. 34 0
      appendix/benchmark.sh

+ 64 - 31
appendix/benchmark.md

@@ -1,45 +1,78 @@
-# Benchmark results
+# Benchmarks
 
-Warning: This benchmark results is outdated. I will update soon.
+## Photo
 
-## Usage
+Note: waifu2x's photo models was trained on [kou's photo collection](http://photosku.com/photo/category/%E6%92%AE%E5%BD%B1%E8%80%85/kou/).
+Note: PSNR in this benchmark uses a MATLAB's rgb2ycbcr compatible function(dynamic range [16 235], not [0, 255]) for converting grayscale image. I think it's not correct PSNR. But many paper used this metric.
 
-```
-th tools/benchmark.lua -dir path/to/dataset_dir -method scale -color y -model1_dir path/to/model_dir
-```
+command: 
+`th tools/benchmark.lua -dir <dataset_dir> -model1_dir <model_dir> -method scale -filter Catrom -color y -range_bug 1 -tta <0|1> -force_cudnn 1`
 
-## Dataset
+### Datasets
 
-    photo_test: 300 various photos.
-    art_test  : 90 artworks (PNG only).
+BSD100: https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/segbench/ (100 test images in BSD300)
+Urban100: https://github.com/jbhuang0604/SelfExSR
 
-## 2x upscaling model
+### 2x - PSNR 
 
-| Dataset/Model | anime\_style\_art(Y) | anime\_style\_art\_rgb | photo   | ukbench|
-|---------------|----------------------|------------------------|---------|--------|
-| photo\_test   |                29.83 |                  29.81 |**29.89**|  29.86 |
-| art\_test     |                36.02 |               **36.24**|  34.92  |  34.85 |
+| Dataset/Model | Bicubic       | vgg\_7/photo  | upconv\_7/photo  | upconv\_7l/photo | 
+|---------------|---------------|---------------|------------------|------------------|
+| BSD100        | 29.558        | 31.427        | 31.640           | 31.749           |
+| Urban100      | 26.852        | 30.057        | 30.477           | 30.759           |
 
-The evaluation metric is PSNR(Y only), higher is better.
+### 2x with TTA - PSNR 
 
-## Denosing level 1 model
+Note: TTA is an ensemble technique that is supported by waifu2x. This method is 8x slower than non TTA method but it improves PSNR (~+0.1 on photo, ~+0.4 on art).
 
-| Dataset/Model            | anime\_style\_art | anime\_style\_art\_rgb | photo   |
-|--------------------------|-------------------|------------------------|---------|
-| photo\_test Quality 80   |             36.07 |               **36.20**|   36.01 |
-| photo\_test Quality 50,45|             31.72 |                 32.01  |**32.31**|
-| art\_test Quality 80     |             40.39 |               **42.48**|   40.35 |
-| art\_test Quality 50,45  |             35.45 |               **36.70**|   36.27 |
+| Dataset/Model | Bicubic       | vgg\_7/photo  | upconv\_7/photo  | upconv\_7l/photo | 
+|---------------|---------------|---------------|------------------|------------------|
+| BSD100        | 29.558        | 31.474        | 31.705           | 31.812           |
+| Urban100      | 26.852        | 30.140        | 30.599           | 30.868           |
 
-The evaluation metric is PSNR(RGB), higher is better.
+### 2x - benchmark elapsed time (sec)
 
-## Denosing level 2 model
+| Dataset/Model | vgg\_7/photo  | upconv\_7/photo  | upconv\_7l/photo | 
+|---------------|---------------|------------------|------------------|
+| BSD100        | 4.057         | 2.509            | 4.947            |
+| Urban100      | 16.349        | 7.083            | 14.178           |
 
-| Dataset/Model            | anime\_style\_art | anime\_style\_art\_rgb | photo   |
-|--------------------------|-------------------|------------------------|---------|
-| photo\_test Quality 80   |             34.03 |                  34.42 |**36.06**|
-| photo\_test Quality 50,45|             31.95 |                  32.31 |**32.42**|
-| art\_test Quality 80     |             39.20 |               **41.12**|   40.48 |
-| art\_test Quality 50,45  |             36.14 |               **37.78**|   36.55 |
+### 2x with TTA - benchmark elapsed time (sec)
+
+| Dataset/Model | vgg\_7/photo  | upconv\_7/photo  | upconv\_7l/photo | 
+|---------------|---------------|------------------|------------------|
+| BSD100        | 36.611        | 20.219           | 42.486           |
+| Urban100      | 132.416       | 65.125           | 129.916          |
+
+## Art
+
+command: 
+`th tools/benchmark.lua -dir <dataset_dir> -model1_dir <model_dir> -method scale -filter Lanczos -color y -range_bug 1 -tta <0|1> -force_cudnn 1`
+
+### Dataset
+
+art_test: This dataset contains 85 various fan-arts. Sorry, This dataset is private. 
+
+### 2x - PSNR 
+
+| Dataset/Model | Bicubic       | vgg\_7/art  | upconv\_7/art  | upconv\_7l/art | 
+|---------------|---------------|-------------|----------------|----------------|
+| art_test      | 31.022        | 37.495      | 38.330         | 39.140         |
+
+### 2x with TTA - PSNR 
+
+| Dataset/Model | Bicubic       | vgg\_7/art  | upconv\_7/art  | upconv\_7l/art | 
+|---------------|---------------|-------------|----------------|----------------|
+| art_test      | 31.022        | 37.777      | 38.677         | 39.510         |
+
+### 2x - benchmark elapsed time (sec)
+
+| Dataset/Model | vgg\_7/art  | upconv\_7/art  | upconv\_7l/art | 
+|---------------|-------------|----------------|----------------|
+| art_test      | 20.681      | 7.683          | 17.667         |
+
+### 2x with TTA - benchmark elapsed time (sec)
+
+| Dataset/Model | vgg\_7/art  | upconv\_7/art  | upconv\_7l/art | 
+|---------------|-------------|----------------|----------------|
+| art_test      | 174.674     | 77.716         | 163.932        |
 
-The evaluation metric is PSNR(RGB), higher is better.

+ 34 - 0
appendix/benchmark.sh

@@ -0,0 +1,34 @@
+#!/bin/sh
+set -x
+
+benchmark_photo() {
+    dir=./benchmarks/${1}/${2}/${3}
+    mkdir -p ${dir}
+    th tools/benchmark.lua -dir data/${1} -model1\_dir models/${2}/photo -method scale -filter Catrom -color y -range\_bug 1 -tta ${3} -force_cudnn 1 -output_dir ${dir} -save_info 1 -show_progress 0 
+}
+run_benchmark_photo() {
+    for tta in 0 1
+    do
+	for dataset in bsd100 urban100
+	do
+	    benchmark_photo ${dataset} vgg_7 ${tta}
+	    benchmark_photo ${dataset} upconv_7 ${tta}
+	    benchmark_photo ${dataset} upconv_7l ${tta}
+	done
+    done
+}
+benchmark_art() {
+    dir=./benchmarks/${1}/${2}/${3}
+    mkdir -p ${dir}
+    th tools/benchmark.lua -dir data/${1} -model1\_dir models/${2}/art -method scale -filter Lanczos -color y -range\_bug 1 -tta ${3} -force_cudnn 1 -output_dir ${dir} -save_info 1 -show_progress 0 
+}
+run_benchmark_art() {
+    for tta in 0 1
+    do
+	benchmark_art art_test vgg_7 ${tta}
+	benchmark_art art_test upconv_7 ${tta}
+	benchmark_art art_test upconv_7l ${tta}
+    done
+}
+#run_benchmark_photo
+run_benchmark_art