Prechádzať zdrojové kódy

Add -force_cudnn option; support for cuDNN in waifu2x.lua/web.lua

nagadomi 9 rokov pred
rodič
commit
af74a67bd1
3 zmenil súbory, kde vykonal 46 pridanie a 24 odobranie
  1. 9 0
      lib/w2nn.lua
  2. 24 15
      waifu2x.lua
  3. 13 9
      web.lua

+ 9 - 0
lib/w2nn.lua

@@ -16,6 +16,15 @@ else
    pcall(load_cunn)
    pcall(load_cunn)
    pcall(load_cudnn)
    pcall(load_cudnn)
    w2nn = {}
    w2nn = {}
+
+   function w2nn.load_model(model_path, force_cudnn)
+      local model = torch.load(model_path, "ascii")
+      if force_cudnn then
+	 model = cudnn.convert(model, cudnn)
+      end
+      model:cuda():evaluate()
+      return model
+   end
    require 'LeakyReLU'
    require 'LeakyReLU'
    require 'LeakyReLU_deprecated'
    require 'LeakyReLU_deprecated'
    require 'DepthExpand2x'
    require 'DepthExpand2x'

+ 24 - 15
waifu2x.lua

@@ -59,7 +59,7 @@ local function convert_image(opt)
    opt.o = format_output(opt, opt.i)
    opt.o = format_output(opt, opt.i)
    if opt.m == "noise" then
    if opt.m == "noise" then
       local model_path = path.join(opt.model_dir, ("noise%d_model.t7"):format(opt.noise_level))
       local model_path = path.join(opt.model_dir, ("noise%d_model.t7"):format(opt.noise_level))
-      local model = torch.load(model_path, "ascii")
+      local model = w2nn.load_model(model_path, opt.force_cudnn)
       if not model then
       if not model then
 	 error("Load Error: " .. model_path)
 	 error("Load Error: " .. model_path)
       end
       end
@@ -69,7 +69,7 @@ local function convert_image(opt)
       print(opt.o .. ": " .. (sys.clock() - t) .. " sec")
       print(opt.o .. ": " .. (sys.clock() - t) .. " sec")
    elseif opt.m == "scale" then
    elseif opt.m == "scale" then
       local model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
       local model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
-      local model = torch.load(model_path, "ascii")
+      local model = w2nn.load_model(model_path, opt.force_cudnn)
       if not model then
       if not model then
 	 error("Load Error: " .. model_path)
 	 error("Load Error: " .. model_path)
       end
       end
@@ -82,8 +82,8 @@ local function convert_image(opt)
       local model_path = path.join(opt.model_dir, ("noise%d_scale%.1fx_model.t7"):format(opt.noise_level, opt.scale))
       local model_path = path.join(opt.model_dir, ("noise%d_scale%.1fx_model.t7"):format(opt.noise_level, opt.scale))
       if path.exists(model_path) then
       if path.exists(model_path) then
 	 local scale_model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
 	 local scale_model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
-	 local t, scale_model = pcall(torch.load, scale_model_path, "ascii")
-	 local model = torch.load(model_path, "ascii")
+	 local t, scale_model = pcall(load_model, scale_model_path, opt.force_cudnn)
+	 local model = w2nn.load_model(model_path, opt.force_cudnn)
 	 if not t then
 	 if not t then
 	    scale_model = model
 	    scale_model = model
 	 end
 	 end
@@ -94,9 +94,9 @@ local function convert_image(opt)
 	 print(opt.o .. ": " .. (sys.clock() - t) .. " sec")
 	 print(opt.o .. ": " .. (sys.clock() - t) .. " sec")
       else
       else
 	 local noise_model_path = path.join(opt.model_dir, ("noise%d_model.t7"):format(opt.noise_level))
 	 local noise_model_path = path.join(opt.model_dir, ("noise%d_model.t7"):format(opt.noise_level))
-	 local noise_model = torch.load(noise_model_path, "ascii")
+	 local noise_model = w2nn.load_model(noise_model_path, opt.force_cudnn)
 	 local scale_model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
 	 local scale_model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
-	 local scale_model = torch.load(scale_model_path, "ascii")
+	 local scale_model = w2nn.load_model(scale_model_path, opt.force_cudnn)
 	 local t = sys.clock()
 	 local t = sys.clock()
 	 x = alpha_util.make_border(x, alpha, reconstruct.offset_size(scale_model))
 	 x = alpha_util.make_border(x, alpha, reconstruct.offset_size(scale_model))
 	 x = image_f(noise_model, x, opt.crop_size, opt.batch_size)
 	 x = image_f(noise_model, x, opt.crop_size, opt.batch_size)
@@ -129,24 +129,24 @@ local function convert_frames(opt)
    end
    end
    if opt.m == "scale" then
    if opt.m == "scale" then
       model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
       model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
-      scale_model = torch.load(model_path, "ascii")
+      scale_model = w2nn.load_model(model_path, opt.force_cudnn)
    elseif opt.m == "noise" then
    elseif opt.m == "noise" then
       model_path = path.join(opt.model_dir, string.format("noise%d_model.t7", opt.noise_level))
       model_path = path.join(opt.model_dir, string.format("noise%d_model.t7", opt.noise_level))
-      noise_model[opt.noise_level] = torch.load(model_path, "ascii")
+      noise_model[opt.noise_level] = w2nn.load_model(model_path, opt.force_cudnn)
    elseif opt.m == "noise_scale" then
    elseif opt.m == "noise_scale" then
       local model_path = path.join(opt.model_dir, ("noise%d_scale%.1fx_model.t7"):format(opt.noise_level, opt.scale))
       local model_path = path.join(opt.model_dir, ("noise%d_scale%.1fx_model.t7"):format(opt.noise_level, opt.scale))
       if path.exists(model_path) then
       if path.exists(model_path) then
-	 noise_scale_model[opt.noise_level] = torch.load(model_path, "ascii")
+	 noise_scale_model[opt.noise_level] = w2nn.load_model(model_path, opt.force_cudnn)
 	 model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
 	 model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
-	 t, scale_model = pcall(torch.load, model_path, "ascii")
+	 t, scale_model = pcall(load_model, model_path, opt.force_cudnn)
 	 if not t then
 	 if not t then
 	    scale_model = noise_scale_model[opt.noise_level]
 	    scale_model = noise_scale_model[opt.noise_level]
 	 end
 	 end
       else
       else
 	 model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
 	 model_path = path.join(opt.model_dir, ("scale%.1fx_model.t7"):format(opt.scale))
-	 scale_model = torch.load(model_path, "ascii")
+	 scale_model = w2nn.load_model(model_path, opt.force_cudnn)
 	 model_path = path.join(opt.model_dir, string.format("noise%d_model.t7", opt.noise_level))
 	 model_path = path.join(opt.model_dir, string.format("noise%d_model.t7", opt.noise_level))
-	 noise_model[opt.noise_level] = torch.load(model_path, "ascii")
+	 noise_model[opt.noise_level] = w2nn.load_model(model_path, opt.force_cudnn)
       end
       end
    end
    end
    local fp = io.open(opt.l)
    local fp = io.open(opt.l)
@@ -214,16 +214,25 @@ local function waifu2x()
    cmd:option("-thread", -1, "number of CPU threads")
    cmd:option("-thread", -1, "number of CPU threads")
    cmd:option("-tta", 0, '8x slower and slightly high quality (0|1)')
    cmd:option("-tta", 0, '8x slower and slightly high quality (0|1)')
    cmd:option("-tta_level", 8, 'TTA level (2|4|8)')
    cmd:option("-tta_level", 8, 'TTA level (2|4|8)')
-   
+   cmd:option("-force_cudnn", 0, 'use cuDNN backend (0|1)')
+
    local opt = cmd:parse(arg)
    local opt = cmd:parse(arg)
    if opt.thread > 0 then
    if opt.thread > 0 then
       torch.setnumthreads(opt.thread)
       torch.setnumthreads(opt.thread)
    end
    end
    if cudnn then
    if cudnn then
       cudnn.fastest = true
       cudnn.fastest = true
-      cudnn.benchmark = false
+      if opt.l:len() > 0 then
+	 cudnn.benchmark = true -- find fastest algo
+      else
+	 cudnn.benchmark = false
+      end
+   end
+   if opt.force_cudnn == 1 then
+      opt.force_cudnn = true
+   else
+      opt.force_cudnn = false
    end
    end
-   
    if string.len(opt.l) == 0 then
    if string.len(opt.l) == 0 then
       convert_image(opt)
       convert_image(opt)
    else
    else

+ 13 - 9
web.lua

@@ -28,6 +28,8 @@ cmd:option("-gpu", 1, 'Device ID')
 cmd:option("-crop_size", 128, 'patch size per process')
 cmd:option("-crop_size", 128, 'patch size per process')
 cmd:option("-batch_size", 1, 'batch size')
 cmd:option("-batch_size", 1, 'batch size')
 cmd:option("-thread", -1, 'number of CPU threads')
 cmd:option("-thread", -1, 'number of CPU threads')
+cmd:option("-force_cudnn", 0, 'use cuDNN backend (0|1)')
+
 local opt = cmd:parse(arg)
 local opt = cmd:parse(arg)
 cutorch.setDevice(opt.gpu)
 cutorch.setDevice(opt.gpu)
 torch.setdefaulttensortype('torch.FloatTensor')
 torch.setdefaulttensortype('torch.FloatTensor')
@@ -36,18 +38,20 @@ if opt.thread > 0 then
 end
 end
 if cudnn then
 if cudnn then
    cudnn.fastest = true
    cudnn.fastest = true
-   cudnn.benchmark = false
+   cudnn.benchmark = true
 end
 end
+opt.force_cudnn = opt.force_cudnn == 1
 local ART_MODEL_DIR = path.join(ROOT, "models", "upconv_7", "art")
 local ART_MODEL_DIR = path.join(ROOT, "models", "upconv_7", "art")
 local PHOTO_MODEL_DIR = path.join(ROOT, "models", "photo")
 local PHOTO_MODEL_DIR = path.join(ROOT, "models", "photo")
-local art_scale2_model = torch.load(path.join(ART_MODEL_DIR, "scale2.0x_model.t7"), "ascii")
-local art_noise1_model = torch.load(path.join(ART_MODEL_DIR, "noise1_model.t7"), "ascii")
-local art_noise2_model = torch.load(path.join(ART_MODEL_DIR, "noise2_model.t7"), "ascii")
-local art_noise3_model = torch.load(path.join(ART_MODEL_DIR, "noise3_model.t7"), "ascii")
-local photo_scale2_model = torch.load(path.join(PHOTO_MODEL_DIR, "scale2.0x_model.t7"), "ascii")
-local photo_noise1_model = torch.load(path.join(PHOTO_MODEL_DIR, "noise1_model.t7"), "ascii")
-local photo_noise2_model = torch.load(path.join(PHOTO_MODEL_DIR, "noise2_model.t7"), "ascii")
-local photo_noise3_model = torch.load(path.join(PHOTO_MODEL_DIR, "noise3_model.t7"), "ascii")
+local art_scale2_model = w2nn.load_model(path.join(ART_MODEL_DIR, "scale2.0x_model.t7"), opt.force_cudnn)
+local art_noise1_model = w2nn.load_model(path.join(ART_MODEL_DIR, "noise1_model.t7"), opt.force_cudnn)
+local art_noise2_model = w2nn.load_model(path.join(ART_MODEL_DIR, "noise2_model.t7"), opt.force_cudnn)
+local art_noise3_model = w2nn.load_model(path.join(ART_MODEL_DIR, "noise3_model.t7"), opt.force_cudnn)
+local photo_scale2_model = w2nn.load_model(path.join(PHOTO_MODEL_DIR, "scale2.0x_model.t7"), opt.force_cudnn)
+local photo_noise1_model = w2nn.load_model(path.join(PHOTO_MODEL_DIR, "noise1_model.t7"), opt.force_cudnn)
+local photo_noise2_model = w2nn.load_model(path.join(PHOTO_MODEL_DIR, "noise2_model.t7"), opt.force_cudnn)
+local photo_noise3_model = w2nn.load_model(path.join(PHOTO_MODEL_DIR, "noise3_model.t7"), opt.force_cudnn)
+collectgarbage()
 local CLEANUP_MODEL = false -- if you are using the low memory GPU, you could use this flag.
 local CLEANUP_MODEL = false -- if you are using the low memory GPU, you could use this flag.
 local CACHE_DIR = path.join(ROOT, "cache")
 local CACHE_DIR = path.join(ROOT, "cache")
 local MAX_NOISE_IMAGE = 2560 * 2560
 local MAX_NOISE_IMAGE = 2560 * 2560