ports/caffe2/msvc-fixes.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8054d98..35934f5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -56,6 +56,14 @@ option(USE_ROCKSDB "Use RocksDB" ON)
 option(USE_SNPE "Use Qualcomm's SNPE library" OFF)
 option(USE_THREADS "Use Threads" ON)
 option(USE_ZMQ "Use ZMQ" OFF)
+if(MSVC)
+  if(BUILD_BUILD_SHARED_LIBS)
+    set(USE_STATIC_RUNTIME_DEFAULT OFF)
+  else()
+    set(USE_STATIC_RUNTIME_DEFAULT ON)
+  endif()
+  option(USE_STATIC_RUNTIME "Link to the static runtime (/MT) instead of dynamic (/MD)" ${USE_STATIC_RUNTIME_DEFAULT})
+endif()
 
 # External projects
 include(ExternalProject)
@@ -99,7 +107,7 @@ else()
   foreach(flag_var
       CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
       CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
-    if (NOT ${BUILD_SHARED_LIBS})
+    if (USE_STATIC_RUNTIME)
       if(${flag_var} MATCHES "/MD")
         string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
       endif(${flag_var} MATCHES "/MD")
diff --git a/caffe2/core/logging.cc b/caffe2/core/logging.cc
index 1b4468e..1379f3a 100644
--- a/caffe2/core/logging.cc
+++ b/caffe2/core/logging.cc
@@ -117,7 +117,7 @@ using fLB::FLAGS_logtostderr;
 
 #endif // CAFFE2_USE_GFLAGS
 
-CAFFE2_DEFINE_int(caffe2_log_level, google::ERROR,
+CAFFE2_DEFINE_int(caffe2_log_level, google::GLOG_ERROR,
                   "The minimum log level that caffe2 will output.");
 
 // Google glog's api does not have an external function that allows one to check
@@ -134,15 +134,23 @@ bool IsGoogleLoggingInitialized();
 namespace caffe2 {
 bool InitCaffeLogging(int* argc, char** argv) {
   if (*argc == 0) return true;
-  if (!::google::glog_internal_namespace_::IsGoogleLoggingInitialized()) {
+#if !(defined(_MSC_VER) && defined(GLOG_IS_A_DLL))
+  // IsGoogleLoggingInitialized is not exported from the glog DLL
+  // so we can't call it. If our program calls InitGoogleLogging twice glog will
+  // abort it.
+  if (!::google::glog_internal_namespace_::IsGoogleLoggingInitialized())
+#endif
+  {
     ::google::InitGoogleLogging(argv[0]);
+#if !defined(_MSC_VER)
     ::google::InstallFailureSignalHandler();
+#endif
   }
   // If caffe2_log_level is set and is lower than the min log level by glog,
   // we will transfer the caffe2_log_level setting to glog to override that.
   FLAGS_minloglevel = std::min(FLAGS_caffe2_log_level, FLAGS_minloglevel);
   // If caffe2_log_level is explicitly set, let's also turn on logtostderr.
-  if (FLAGS_caffe2_log_level < google::ERROR) {
+  if (FLAGS_caffe2_log_level < google::GLOG_ERROR) {
     FLAGS_logtostderr = 1;
   }
   // Also, transfer the caffe2_log_level verbose setting to glog.
@@ -154,7 +162,7 @@ bool InitCaffeLogging(int* argc, char** argv) {
 
 void ShowLogInfoToStderr() {
   FLAGS_logtostderr = 1;
-  FLAGS_minloglevel = std::min(FLAGS_minloglevel, google::INFO);
+  FLAGS_minloglevel = std::min(FLAGS_minloglevel, google::GLOG_INFO);
 }
 }  // namespace caffe2
 
diff --git a/caffe2/core/logging_is_google_glog.h b/caffe2/core/logging_is_google_glog.h
index 7dd2b4f..2df4435 100644
--- a/caffe2/core/logging_is_google_glog.h
+++ b/caffe2/core/logging_is_google_glog.h
@@ -8,7 +8,7 @@
 // it. Some mobile platforms do not like stl_logging, so we add an
 // overload in that case as well.
 
-#if !defined(__CUDACC__) && !defined(CAFFE2_USE_MINIMAL_GOOGLE_GLOG)
+#if !defined(__CUDARCH__) && !defined(CAFFE2_USE_MINIMAL_GOOGLE_GLOG)
 #include <glog/stl_logging.h>
 #else // !defined(__CUDACC__) && !!defined(CAFFE2_USE_MINIMAL_GOOGLE_GLOG)
 
diff --git a/caffe2/image/image_input_op.h b/caffe2/image/image_input_op.h
index 9604e98..7d90014 100644
--- a/caffe2/image/image_input_op.h
+++ b/caffe2/image/image_input_op.h
@@ -214,13 +214,13 @@ ImageInputOp<Context>::ImageInputOp(
 
   // hard-coded PCA eigenvectors and eigenvalues, based on RBG channel order
   color_lighting_eigvecs_.push_back(
-    std::vector<float>{-144.7125, 183.396, 102.2295});
+    std::vector<float>{-144.7125f, 183.396f, 102.2295f});
   color_lighting_eigvecs_.push_back(
-    std::vector<float>{-148.104, -1.1475, -207.57});
+    std::vector<float>{-148.104f, -1.1475f, -207.57f});
   color_lighting_eigvecs_.push_back(
-    std::vector<float>{-148.818, -177.174, 107.1765});
+    std::vector<float>{-148.818f, -177.174f, 107.1765f});
 
-  color_lighting_eigvals_ = std::vector<float>{0.2175, 0.0188, 0.0045};
+  color_lighting_eigvals_ = std::vector<float>{0.2175f, 0.0188f, 0.0045f};
 
   CAFFE_ENFORCE_GT(batch_size_, 0, "Batch size should be nonnegative.");
   if (use_caffe_datum_) {
diff --git a/caffe2/operators/batch_matmul_op.cc b/caffe2/operators/batch_matmul_op.cc
index c2e578d..28cf030 100644
--- a/caffe2/operators/batch_matmul_op.cc
+++ b/caffe2/operators/batch_matmul_op.cc
@@ -34,7 +34,7 @@ size (C x K x N) where C is the batch size and i ranges from 0 to C-1.
         b_dim1 = in[1].dims(2);
       }
       return vector<TensorShape> {
-          CreateTensorShape(vector<int> {
+          CreateTensorShape(vector<TIndex> {
               in[0].dims(0), a_dim0, b_dim1},
               in[0].data_type())
       };
diff --git a/caffe2/operators/layer_norm_op.cu b/caffe2/operators/layer_norm_op.cu
index df13fc3..68bbc97 100644
--- a/caffe2/operators/layer_norm_op.cu
+++ b/caffe2/operators/layer_norm_op.cu
@@ -252,8 +252,8 @@ bool LayerNormGradientOp<CUDAContext>::DoRunWithType<float>() {
   auto* ginput = Output(0);
 
   const auto canonical_axis = norm_inputs.canonical_axis_index(axis_);
-  const int left = norm_inputs.size_to_dim(canonical_axis);
-  const int right = norm_inputs.size_from_dim(canonical_axis);
+  const TIndex left = norm_inputs.size_to_dim(canonical_axis);
+  const TIndex right = norm_inputs.size_from_dim(canonical_axis);
 
   ginput->ResizeLike(norm_inputs);
   std::vector<TIndex> stats_dims(
@@ -261,7 +261,7 @@ bool LayerNormGradientOp<CUDAContext>::DoRunWithType<float>() {
   stats_dims.push_back(1);
   dmean_.Resize(stats_dims);
   dstdev_.Resize(stats_dims);
-  gscratch_.Resize(std::vector<size_t>{left, right});
+  gscratch_.Resize(std::vector<TIndex>{left, right});
 
   std::vector<int> segs(left + 1);
   std::iota(segs.begin(), segs.end(), 0);
@@ -291,7 +291,7 @@ bool LayerNormGradientOp<CUDAContext>::DoRunWithType<float>() {
       dout.data<float>(),
       gscratch_.mutable_data<float>());
 
-  dstdev_.Resize(vector<size_t>{left, 1});
+  dstdev_.Resize(vector<TIndex>{left, 1});
   // dstdev = reduce(temp1)
   allocScratchAndReduce(
       gscratch_.data<float>(),
diff --git a/caffe2/operators/lengths_top_k_op.cc b/caffe2/operators/lengths_top_k_op.cc
index c871d53..bad741a 100644
--- a/caffe2/operators/lengths_top_k_op.cc
+++ b/caffe2/operators/lengths_top_k_op.cc
@@ -14,7 +14,7 @@ bool LengthsTopKOp<T, Context>::RunOnDevice() {
 
   output_topk_values->Resize(N * k_);
   output_topk_indices->Resize(N * k_);
-  std::vector<int> output_dims = std::vector<int>({N, k_});
+  std::vector<TIndex> output_dims = std::vector<TIndex>({N, k_});
   output_topk_values->Reshape(output_dims);
   output_topk_indices->Reshape(output_dims);
   T* output_topk_values_data = output_topk_values->template mutable_data<T>();
diff --git a/caffe2/operators/pool_op_cudnn.cu b/caffe2/operators/pool_op_cudnn.cu
index a380d8d..b0cd326 100644
--- a/caffe2/operators/pool_op_cudnn.cu
+++ b/caffe2/operators/pool_op_cudnn.cu
@@ -467,6 +467,15 @@ class CuDNNPoolGradientOp : public ConvPoolOpBase<CUDAContext> {
   cudnnPoolingDescriptor_t pooling_desc_;
   cudnnPoolingMode_t mode_;
 
+// MSVC defines IN and OUT in minwindef.h
+#ifdef IN
+#undef IN
+#endif
+
+#ifdef OUT
+#undef OUT
+#endif
+
   // Input: X, Y, dY
   // Output: dX
   INPUT_TAGS(IN, OUT, OUT_GRAD);
diff --git a/caffe2/operators/recurrent_op_cudnn.cc b/caffe2/operators/recurrent_op_cudnn.cc
index 7777813..58bc8c3 100644
--- a/caffe2/operators/recurrent_op_cudnn.cc
+++ b/caffe2/operators/recurrent_op_cudnn.cc
@@ -115,10 +115,11 @@ void RecurrentBaseOp<T>::initialize(
 
   // RNN setup
   {
-    CUDNN_ENFORCE(cudnnSetRNNDescriptor(
+// Do not use #if condition inside CUDNN_ENFORCE
+// to avoid macro expansion errors.
 #if CUDNN_MAJOR >= 7
+CUDNN_ENFORCE(cudnnSetRNNDescriptor(
         cudnn_wrapper_.inline_cudnn_handle(),
-#endif
         rnnDesc_,
         hiddenSize,
         numLayers,
@@ -126,10 +127,19 @@ void RecurrentBaseOp<T>::initialize(
         rnnInput,
         rnnDirection,
         rnnMode,
-#if CUDNN_MAJOR >= 7
         CUDNN_RNN_ALGO_STANDARD, // TODO: verify correctness / efficiency.
-#endif
         cudnnTypeWrapper<T>::type));
+#else
+    CUDNN_ENFORCE(cudnnSetRNNDescriptor(
+        rnnDesc_,
+        hiddenSize,
+        numLayers,
+        dropoutDesc_,
+        rnnInput,
+        rnnDirection,
+        rnnMode,
+        cudnnTypeWrapper<T>::type));
+#endif  // CUDNN_MAJOR >= 7
   }
   // X setup
   {
diff --git a/caffe2/utils/CMakeLists.txt b/caffe2/utils/CMakeLists.txt
index f90af5a..919a638 100644
--- a/caffe2/utils/CMakeLists.txt
+++ b/caffe2/utils/CMakeLists.txt
@@ -27,6 +27,10 @@ exclude(Caffe2_CPU_SRCS "${Caffe2_CPU_SRCS}" ${Caffe2_GPU_SRCS})
 # will directly link nnpack pthreadpool.
 file(GLOB_RECURSE tmp pthreadpool*)
 exclude(Caffe2_CPU_SRCS "${Caffe2_CPU_SRCS}" ${tmp})
+if(MSVC)
+  file(GLOB_RECURSE tmp *ThreadPool.cc)
+  exclude(Caffe2_CPU_SRCS "${Caffe2_CPU_SRCS}" ${tmp})
+endif()
 
 # ---[ GPU test files
 file(GLOB_RECURSE tmp *_gpu_test.cc)
diff --git a/caffe2/utils/GpuBitonicSort.cuh b/caffe2/utils/GpuBitonicSort.cuh
index f52bb50..a199bcb 100644
--- a/caffe2/utils/GpuBitonicSort.cuh
+++ b/caffe2/utils/GpuBitonicSort.cuh
@@ -39,9 +39,11 @@ __device__ inline void bitonicSort(K* keys,
   // Assume the sort is taking place in shared memory
   // static_assert(Power2SortSize * (sizeof(K) + sizeof(V)) < 32768,
   //               "sort data too large (>32768 bytes)");
-  static_assert(math::integerIsPowerOf2(Power2SortSize),
+  static_assert(math::integerIsPowerOf2(
+                std::integral_constant<int, Power2SortSize>::value),
                 "sort size must be power of 2");
-  static_assert(math::integerIsPowerOf2(ThreadsPerBlock),
+  static_assert(math::integerIsPowerOf2(
+                std::integral_constant<int, ThreadsPerBlock>::value),
                 "threads in block must be power of 2");
 
   // If what we are sorting is too small, then not all threads
@@ -107,7 +109,8 @@ __device__ inline void warpBitonicSort(K* keys,
   // Smaller sorts should use a warp shuffle sort
   static_assert(Power2SortSize > kWarpSize,
                 "sort not large enough");
-  static_assert(math::integerIsPowerOf2(Power2SortSize),
+  static_assert(math::integerIsPowerOf2(
+                std::integral_constant<int, Power2SortSize>::value),
                 "sort size must be power of 2");
   static_assert(Power2SortSize <= kMaxBitonicSortSize,
                 "sort size <= 4096 only supported");
diff --git a/caffe2/utils/math.h b/caffe2/utils/math.h
index 6c352dc..a1eda9d 100644
--- a/caffe2/utils/math.h
+++ b/caffe2/utils/math.h
@@ -426,18 +426,10 @@ constexpr T roundUp(T a, T b) {
   return divUp<T>(a, b) * b;
 }
 
-// Returns true if the given integer type is a power-of-2 (positive only)
-// Note(jiayq): windows reported an error per
-//     https://github.com/caffe2/caffe2/issues/997
-// and as a result will make it a macro.
-#ifdef _MSC_VER
-#define integerIsPowerOf2(v) ((v) && !((v) & ((v) - 1)))
-#else // _MSC_VER
 template <typename T>
 constexpr bool integerIsPowerOf2(T v) {
   return (v && !(v & (v - 1)));
 }
-#endif // _MSC_VER
 
 // Returns log2(n) for a positive integer type
 template <typename T>
diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake
index df70e2f..ca1b123 100644
--- a/cmake/Cuda.cmake
+++ b/cmake/Cuda.cmake
@@ -37,6 +37,10 @@ function(caffe2_detect_installed_gpus out_variable)
                     ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
 
     if(__nvcc_res EQUAL 0)
+      # nvcc outputs text containing line breaks when building with MSVC.
+      # The line below prevents CMake from inserting a variable with line
+      # breaks in the cache
+      string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}")
       string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}")
       set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from caffe_detect_gpus tool" FORCE)
     endif()
@@ -249,7 +253,7 @@ endif()
 # Debug and Release symbol support
 if (MSVC)
   if (${CMAKE_BUILD_TYPE} MATCHES "Release")
-    if (${BUILD_SHARED_LIBS})
+    if (NOT USE_STATIC_RUNTIME)
       list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MD")
     else()
       list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MT")
@@ -259,7 +263,7 @@ if (MSVC)
             "Caffe2 currently does not support the combination of MSVC, Cuda "
             "and Debug mode. Either set USE_CUDA=OFF or set the build type "
             "to Release")
-    if (${BUILD_SHARED_LIBS})
+    if (NOT USE_STATIC_RUNTIME)
       list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MDd")
     else()
       list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MTd")
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index afb6b68..bdad8b6 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -182,6 +182,15 @@ if(USE_OPENCV)
     message(WARNING "Not compiling with OpenCV. Suppress this warning with -DUSE_OPENCV=OFF")
     set(USE_OPENCV OFF)
   endif()
+  if(USE_OPENCV AND VCPKG_TARGET_TRIPLET MATCHES static)
+    find_package(LibLZMA QUIET)
+    if(LIBLZMA_FOUND)
+      list(APPEND Caffe2_DEPENDENCY_LIBS ${LIBLZMA_LIBRARIES})
+    else()
+      message(WARNING "Not compiling with OpenCV. Could not find liblzma. Suppress this warning with -DUSE_OPENCV=OFF")
+      set(USE_OPENCV OFF)
+    endif()
+  endif()
 endif()
 
 # ---[ FFMPEG
diff --git a/cmake/Modules/FindGlog.cmake b/cmake/Modules/FindGlog.cmake
index 1167532..9780ba5 100644
--- a/cmake/Modules/FindGlog.cmake
+++ b/cmake/Modules/FindGlog.cmake
@@ -27,6 +27,10 @@ if(MSVC)
     endif()
     if(TARGET ${GLOG_LIBRARY})
       get_target_property(GLOG_INCLUDE_DIR ${GLOG_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES)
+      get_target_property(GLOG_TYPE ${GLOG_LIBRARY} TYPE)
+      if("${GLOG_TYPE}" STREQUAL "SHARED_LIBRARY")
+        add_definitions(-DGLOG_IS_A_DLL=1)
+      endif()
     endif()
 else()
     find_library(GLOG_LIBRARY glog
diff --git a/cmake/ProtoBuf.cmake b/cmake/ProtoBuf.cmake
index 89975c8..e37d6da 100644
--- a/cmake/ProtoBuf.cmake
+++ b/cmake/ProtoBuf.cmake
@@ -13,6 +13,9 @@ function(custom_protobuf_find)
     # so we turn it off here.
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations" PARENT_SCOPE)
   endif()
+  if(MSVC)
+    set(protobuf_MSVC_STATIC_RUNTIME ${USE_STATIC_RUNTIME})
+  endif()
   add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/protobuf/cmake)
   caffe2_include_directories(${PROJECT_SOURCE_DIR}/third_party/protobuf/src)
   list(APPEND Caffe2_DEPENDENCY_LIBS libprotobuf)
diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake
index b7423a6..2996484 100644
--- a/cmake/Summary.cmake
+++ b/cmake/Summary.cmake
@@ -19,6 +19,13 @@ function (Caffe2_print_configuration_summary)
   message(STATUS "  System                : ${CMAKE_SYSTEM_NAME}")
   message(STATUS "  C++ compiler          : ${CMAKE_CXX_COMPILER}")
   message(STATUS "  C++ compiler version  : ${CMAKE_CXX_COMPILER_VERSION}")
+  if(MSVC)
+    if(USE_STATIC_RUNTIME)
+      message(STATUS "  Runtime               : static (/MT)")
+    else()
+      message(STATUS "  Runtime               : dynamic (/MD)")
+    endif()
+  endif()
   message(STATUS "  Protobuf compiler     : ${PROTOBUF_PROTOC_EXECUTABLE}")
   message(STATUS "  CXX flags             : ${CMAKE_CXX_FLAGS}")
   message(STATUS "  Build type            : ${CMAKE_BUILD_TYPE}")