commit fa4b8965d7e78a539cd20fa0f524ff4d930d3ada Author: Kevin Boyd Date: Fri May 8 13:33:05 2020 -0700 Initial commit diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..51e2a2f --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "third_party/farmhash"] + path = third_party/farmhash + url = https://github.com/google/farmhash diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..ed1b6f8 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,155 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required (VERSION 3.2) +project (cpu_check VERSION 20181130 LANGUAGES C CXX) + +# Options +# Use clang/llvm by default. +option(USE_CLANG "build with clang" ON) +# Build semi-statically by default. +option(BUILD_STATIC "build targets semi-statically linked" ON) + +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif(NOT CMAKE_BUILD_TYPE) + + +# Begin Google local change + +# We use an in tree copy of boringSSL by default. +option(USE_BORINGSSL "build with boringSSL" OFF) + +option(IN_GOOGLE3 "building in google3" OFF) + +# The vendors subdirectories may not be present. +find_path( + VENDORS_AMD_PATH + NAMES amd.cc + PATHS ${CMAKE_CURRENT_SOURCE_DIR}/vendors/amd + NO_DEFAULT_PATH +) + +find_path( + VENDORS_INTEL_PATH + NAMES intel.cc + PATHS ${CMAKE_CURRENT_SOURCE_DIR}/vendors/intel + NO_DEFAULT_PATH +) +# End Google local change + +# Config header +configure_file ( + "${PROJECT_SOURCE_DIR}/config.h.in" + "${PROJECT_BINARY_DIR}/config.h" +) +include_directories("${PROJECT_BINARY_DIR}") + +if (USE_CLANG) + set(CMAKE_C_COMPILER clang) + set(CMAKE_CXX_COMPILER clang++) + set(CC clang) + set(CXX clang++) +endif(USE_CLANG) + +set(CMAKE_C_FLAGS_DEBUG "-g -Wall -O0") +set(CMAKE_CXX_FLAGS_DEBUG "-g -Wall -O0") +set(CMAKE_C_FLAGS_RELEASE "-Wall -O2") +set(CMAKE_CXX_FLAGS_RELEASE "-Wall -O2") + +set(CMAKE_C_STANDARD 11) +set(CMAKE_C_STANDARD_REQUIRED ON) +set(CMAKE_C_EXTENSIONS OFF) # we want c11 not gnu11 +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) # we want c++17 not gnu++17 + +add_executable(cpu_check cpu_check.cc) +add_executable(crc32c_test crc32c_test.cc) + +add_library(farmhash third_party/farmhash/src/farmhash.cc) +add_library(crc32c crc32c.c) +add_library(fvt_controller fvt_controller.cc) +add_library(utils utils.cc) + +# Begin Google local change +if (VENDORS_AMD_PATH) + add_library(amd ${VENDORS_AMD_PATH}/amd.cc) + add_library(hsmp ${VENDORS_AMD_PATH}/hsmp.cc) + target_link_libraries(amd hsmp pci) + set(VENDORS_LIBS ${VENDORS_LIBS} amd) +endif(VENDORS_AMD_PATH) + +if (VENDORS_INTEL_PATH) + add_library(intel ${VENDORS_INTEL_PATH}/intel.cc) + set(VENDORS_LIBS ${VENDORS_LIBS} intel) +endif(VENDORS_INTEL_PATH) +# End Google local change + +include(CheckCXXCompilerFlag) +check_cxx_compiler_flag("-march=sandybridge" ARCH_SANDYBRIDGE) +if(ARCH_SANDYBRIDGE) + target_compile_options(farmhash PUBLIC -march=sandybridge) + target_compile_options(crc32c PUBLIC -march=sandybridge) +endif(ARCH_SANDYBRIDGE) + +target_link_libraries(cpu_check crc32c farmhash) +# Begin Google local change +target_link_libraries(cpu_check fvt_controller ${VENDORS_LIBS} utils) +# End Google local change +target_link_libraries(crc32c_test crc32c) + +if (BUILD_STATIC) + set(CMAKE_FIND_LIBRARY_SUFFIXES ".a") +endif(BUILD_STATIC) + +# Needs pthreads +find_package(Threads REQUIRED) +target_link_libraries(cpu_check Threads::Threads) + +# Needs zlib +find_package (ZLIB REQUIRED) +if(ZLIB_INCLUDE_DIRS) + include_directories(${ZLIB_INCLUDE_DIRS}) +endif(ZLIB_INCLUDE_DIRS) +if(ZLIB_LIBRARIES) + target_link_libraries(cpu_check ${ZLIB_LIBRARIES}) +endif(ZLIB_LIBRARIES) + +# Begin Google local change +if(USE_BORINGSSL) + set(BORINGSSL_PATH ${CMAKE_CURRENT_SOURCE_DIR}/boringssl) + add_subdirectory(${BORINGSSL_PATH}) + set(BORINGSSL_INCLUDE_DIRS ${BORINGSSL_PATH}/include) + include_directories("${BORINGSSL_PATH}/include") + target_link_libraries(cpu_check ssl crypto) +else(USE_BORINGSSL) +# End Google local change + +# Needs OpenSSL +find_package (OpenSSL REQUIRED) +include_directories(${OPENSSL_INCLUDE_DIRS}) +target_link_libraries(cpu_check ${OPENSSL_LIBRARIES}) + +# Static linking of OpenSSL may require -ldl, link it if found. +find_library (dl dl) +if(dl) + target_link_libraries(cpu_check dl) +endif(dl) + +# Begin Google local change +endif(USE_BORINGSSL) +# End Google local change + +install (TARGETS cpu_check DESTINATION bin) diff --git a/CONTRIBUTING b/CONTRIBUTING new file mode 100644 index 0000000..57d145a --- /dev/null +++ b/CONTRIBUTING @@ -0,0 +1,22 @@ + How to Contribute + +Contributions are encouraged! Please read below for requirements. + +## Contributor License Agreement + +Contributions to this project must be accompanied by a Contributor License +Agreement. You (or your employer) retain the copyright to your contribution; +this simply gives us permission to use and redistribute your contributions as +part of the project. Head over to to see +your current agreements on file or to sign a new one. + +You generally only need to submit a CLA once, so if you've already submitted one +(even if it was for a different project), you probably don't need to do it +again. + +## Code reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult +[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more +information on using pull requests. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6740d9a --- /dev/null +++ b/LICENSE @@ -0,0 +1,217 @@ +Copyright 2018 Paul Ripke + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +------------------------------------------------------------------------------- + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..6da99c3 --- /dev/null +++ b/README.md @@ -0,0 +1,71 @@ +# cpu_check + +`NOTE:` BETA code, work-in-progress. + +CPU torture test designed for SMP systems, attempting to find CPU hardware faults, focusing primarily on the x86_64 architecture. + +The basic operation is to: +* Run threads with affinity fixed to each logical CPU. +* Generate a chunk of random data, either dictionary based text, or random binary data. +* Run a number of checksum/hash algorithms over the data, store their results. +* Compress the data via one of (zlib, ...). +* Encrypt the data via AES-256-GCM. +* Copy the data ('rep movsb' on x86, else memcpy). +* Switch affinity to an alternate logical CPU. +* Decrypt. +* Decompress. +* Run checksum/hash algorithms and compare with stored results. + +Algorithms are chosen to exercise various hardware extensions. Eg. on x86_64, SSE4.2, AVX, etc. + +## Prerequisites: + +Designed to run under Unix/Linux OS. + +* cmake: https://cmake.org/ +* zlib +* OpenSSL/BoringSSL + +## Building + +``` +sh$ git clone git@github.com:stixpjr/cpu_check.git +sh$ cd cpu_check +sh$ mkdir build +sh$ cd build +sh$ cmake .. +sh$ make +``` + +## Options + +Some options have been implememented that affect the build, which may be passed +to cmake via, eg: + +```cmake -DCMAKE_BUILD_TYPE=(Debug|Release)``` + +* CMAKE_BUILD_TYPE=(Release|Debug) +* USE_CLANG=(ON|OFF) +* BUILD_STATIC=(ON|OFF) + +## TODO: + +* Use git submodules for: + * farmhash: https://github.com/google/farmhash + * highwayhash: https://github.com/google/highwayhash + * crc32c: https://github.com/google/crc32c + * cityhash: https://github.com/google/cityhash + * brotli: https://github.com/google/brotli + * gipfeli: https://github.com/google/gipfeli +* Expand encryption coverage - find those algorithms that stress the HW. +* Flags to enable/disable steps, eg. encryption. +* Flags controlling min/max buffer size. +* Use cpuid to dynamically select appropriate instruction set extensions. +* Query ACPI/cpuid for more meaningful CPU identification. +* Extra x86_64 instruction coverage: + * movnti (SSE2 mov doubleword with non-temporal hint) + * prefetch* + * movbe (mov with byte swap) +* Consider floating point tests? +* Keep stats on corruptions (eg. buffer lengths/alignments, detection means (crc32), etc). +* Try to narrow down corruptions automatically. diff --git a/config.h.in b/config.h.in new file mode 100644 index 0000000..a6641e8 --- /dev/null +++ b/config.h.in @@ -0,0 +1,8 @@ +#define cpu_check_VERSION "@cpu_check_VERSION@" + +// Begin Google local change +#cmakedefine IN_GOOGLE3 +#cmakedefine USE_BORINGSSL +#cmakedefine VENDORS_AMD_PATH +#cmakedefine VENDORS_INTEL_PATH +// End Google local change diff --git a/corrupt_cores.cc b/corrupt_cores.cc new file mode 100644 index 0000000..811e08f --- /dev/null +++ b/corrupt_cores.cc @@ -0,0 +1,200 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Analyzes cpu_check failure tids to produce list of condemned +// cores. Usually there's just one defective core. +// +// One way to extract tids from logs is extract_tids.sh. +// Pipe its output to this program. +// +// By default, this code assumes 28 core dual socket machines. + +#include +#include +#include +#include +#include + +#include "log.h" + +class BadCore { + public: + BadCore(int sockets, int cores_per_socket) + : sockets_(sockets), cores_per_socket_(cores_per_socket) {} + + // Condemns thread 'tid'. + void Condemn(int tid) { + std::vector c({TidToCanonicalCore(tid)}); + accused_.push_back(c); + } + + // Condemns one of 'tid_1' and 'tid_2'. + void Accuse(int tid_1, int tid_2) { + std::vector c({TidToCanonicalCore(tid_1), TidToCanonicalCore(tid_2)}); + accused_.push_back(c); + } + + // Greedy condemnation. + void Condemn() { + while (!accused_.empty()) { + CondemnWorst(); + } + } + + // Returns string naming the condemned cores. + std::string Condemnations() const { + if (condemned_.empty()) { + return "None"; + } + std::stringstream s; + if (ambiguous_) { + s << "AMBIGUOUS "; + } + for (auto &c : condemned_) { + s << CanonicalCoreToString(c.first) << " (" << c.second << ") "; + } + return s.str(); + } + + // Returns true if tid within legitimate range. + bool Plausible(int tid) const { + return (tid >= 0) && (tid < (2 * sockets_ * cores_per_socket_)); + } + + private: + // Condemns worst offender. + void CondemnWorst() { + int worst = -1; + int worst_k = -1; + bool ambiguous = false; + for (int c = 0; c < sockets_ * cores_per_socket_; c++) { + const int k = AccusationCount(c); + if (k == 0) continue; + if (k > worst_k) { + worst = c; + worst_k = k; + ambiguous = false; + } else { + if (k == worst_k) { + ambiguous = true; + } + } + } + ambiguous_ |= ambiguous; + condemned_.push_back({worst, worst_k}); + Dispose(worst); + } + + // Returns number of accusations against 'canonical_core'. + int AccusationCount(int canonical_core) const { + int k = 0; + for (auto &v : accused_) { + if (std::find(v.begin(), v.end(), canonical_core) != v.end()) { + k++; + } + } + return k; + } + + // Delete accusations that include 'canonical_core'. + void Dispose(int canonical_core) { + std::vector> temp; + for (auto &v : accused_) { + if (std::find(v.begin(), v.end(), canonical_core) == v.end()) { + temp.push_back(v); + } + } + accused_ = temp; + } + + int TidToCanonicalCore(int tid) const { + return tid % (sockets_ * cores_per_socket_); + } + + std::string CanonicalCoreToString(int canonical_core) const { + const int socket = canonical_core / cores_per_socket_; + const int a = canonical_core; + const int b = canonical_core + sockets_ * cores_per_socket_; + std::stringstream s; + s << "CPU" << socket << " HT" << a << "-" << b; + return s.str(); + } + + const int sockets_; + const int cores_per_socket_; + std::vector> accused_; + std::vector> condemned_; + bool ambiguous_ = false; +}; + +static void UsageIf(bool v) { + if (!v) return; + LOG(ERROR) << "Usage corrupt_cores [-c cores_per_socket] [-s sockets]"; + exit(2); +} + +int main(int argc, char **argv) { + int sockets = 2; // Default: dual socket + int cores_per_socket = 28; // Default: C28 + for (int i = 1; i < argc; i++) { + const char *flag = argv[i]; + UsageIf(flag[0] != '-'); + for (flag++; *flag != 0; flag++) { + switch (*flag) { + case 'c': { + std::string c(++flag); + flag += c.length(); + std::stringstream s(c); + UsageIf((s >> cores_per_socket).fail()); + break; + } + case 's': { + std::string c(++flag); + flag += c.length(); + std::stringstream s(c); + UsageIf((s >> sockets).fail()); + break; + } + default: + UsageIf(true); + } + if (*flag == 0) break; + } + } + + std::string line; + BadCore bad(sockets, cores_per_socket); + + while (std::getline(std::cin, line)) { + std::istringstream ss(line); + int a = 9999; + if ((ss >> a).fail() || !bad.Plausible(a)) { + LOG(ERROR) << "Bad input: '" << line << "'"; + continue; + } + while (ss.peek() == ' ') ss.ignore(); + if (ss.eof()) { + bad.Condemn(a); + } else { + int b = 9999; + if ((ss >> b).fail() || !bad.Plausible(b)) { + LOG(ERROR) << "Bad input: '" << line << "'"; + continue; + } + bad.Accuse(a, b); + } + } + bad.Condemn(); + printf("Condemned %s\n", bad.Condemnations().c_str()); +} diff --git a/cpu_check.cc b/cpu_check.cc new file mode 100644 index 0000000..32c46e5 --- /dev/null +++ b/cpu_check.cc @@ -0,0 +1,2075 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#undef NDEBUG +#include "config.h" + +#include +#include +#include + +#include +#include +#if defined(__x86_64__) || defined(__i386__) +#include +#endif +#include +#include +#include +#include +#include +#include +#ifdef IN_GOOGLE3 +#include "third_party/zlib/zlib.h" +#else +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef IN_GOOGLE3 +#include "third_party/openssl/crypto.h" +#include "third_party/openssl/evp.h" +#else +#include +#include +#endif + +#ifdef IN_GOOGLE3 +#include "third_party/absl/debugging/failure_signal_handler.h" +#include "third_party/absl/debugging/symbolize.h" +#endif + +#include "crc32c.h" +#include "third_party/farmhash/src/farmhash.h" +#include "fvt_controller.h" +#include "log.h" +#include "utils.h" + +#if defined(__i386__) || defined(__x86_64__) +#define X86_TARGET_ATTRIBUTE(s) __attribute__ ((target (s))) +#else +#define X86_TARGET_ATTRIBUTE(s) +#endif + +#undef HAS_FEATURE_MEMORY_SANITIZER +#if defined(__has_feature) +# if __has_feature(memory_sanitizer) +#define HAS_FEATURE_MEMORY_SANITIZER +# endif +#endif + +// Helper to make MSAN happy. NOP if memory sanitizer is not enabled. +void InitializeMemoryForSanitizer(char* addr, size_t size) { +#ifdef HAS_FEATURE_MEMORY_SANITIZER + std::default_random_engine rnd; + std::uniform_int_distribution dist(std::numeric_limits::min(), + std::numeric_limits::max()); + for (size_t i = 0; i < size; i++) { + addr[i] = dist(rnd); + } +#endif +} + +inline void __movsb(char *dst, const char *src, size_t size) { +#if defined(__i386__) || defined(__x86_64__) + __asm__ __volatile__("rep movsb" + : "+D"(dst), "+S"(src), "+c"(size) + : + : "memory"); +#else + LOG(FATAL) << "Cannot rep;movsb"; +#endif +} + +inline void __stosb(void *dst, unsigned char c, size_t size) { +#if defined(__i386__) || defined(__x86_64__) + __asm__ __volatile__("rep stosb" + : "+D"(dst), "+c"(size) + : "a"(c) + : "memory"); +#else + LOG(FATAL) << "Cannot rep;stosb"; +#endif +} + +inline void __sse_128_memcpy(char *dst, const char *src, size_t size) { +#if (defined(__i386__) || defined(__x86_64__)) + size_t blks = size / 16; + for (int i = 0; i < blks; i++) { + _mm_storeu_si128( + reinterpret_cast<__m128i *>(dst) + i, + _mm_loadu_si128(reinterpret_cast(src) + i)); + } + memcpy(dst + blks * 16, src + blks * 16, size - blks * 16); +#else + LOG(FATAL) << "SSE not available"; +#endif +} + +X86_TARGET_ATTRIBUTE("avx") +inline void __avx_256_memcpy(char *dst, const char *src, size_t size) { +#if (defined(__i386__) || defined(__x86_64__)) + size_t blks = size / 32; + for (int i = 0; i < blks; i++) { + _mm256_storeu_si256( + reinterpret_cast<__m256i *>(dst) + i, + _mm256_loadu_si256(reinterpret_cast(src) + i)); + } + memcpy(dst + blks * 32, src + blks * 32, size - blks * 32); +#else + LOG(FATAL) << "x86 only"; +#endif +} + +X86_TARGET_ATTRIBUTE("avx512f") +inline void __avx_512_memcpy(char *dst, const char *src, size_t size) { +#if (defined(__i386__) || defined(__x86_64__)) + size_t blks = size / 64; + for (int i = 0; i < blks; i++) { + _mm512_storeu_si512( + reinterpret_cast<__m512i *>(dst) + i, + _mm512_loadu_si512(reinterpret_cast(src) + i)); + } + memcpy(dst + blks * 64, src + blks * 64, size - blks * 64); +#else + LOG(FATAL) << "x86 only"; +#endif +} + +static const double t0 = TimeInSeconds(); + +static void SleepForMillis(int64_t millis) { + // LOG(INFO) << "sleeping " << millis; + int64_t micros = 1000 * millis; + while (micros > 0) { + int mm = std::min(1000000, micros); + int rc = usleep(mm); + if (rc) { + LOG(ERROR) << "cant sleep"; + } + micros -= mm; + } +} + +static long pagesize = sysconf(_SC_PAGESIZE); +static long cache_line_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); + +std::string HexData(const char *s, uint32_t l) { + const char d[16] = {'0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; + std::string o; + o.resize(l << 1); + for (uint32_t i = 0; i < l; i++) { + uint8_t b = s[i]; + o[(i << 1)] = d[(b >> 4) & 0xf]; + o[(i << 1) + 1] = d[b & 0xf]; + } + return o; +} + +std::string HexStr(const std::string &s) { return HexData(s.data(), s.size()); } + +std::atomic_bool exiting(false); +std::atomic_uintmax_t errorCount(0); +std::atomic_uintmax_t successCount(0); +static constexpr uintmax_t kErrorLimit = 2000; + +#if defined(__i386__) || defined(__x86_64__) +const bool is_x86 = true; +#else +const bool is_x86 = false; +#endif + +// So-called Logistic Map with parameter 4.0. +// Floating point approximation aside, if 0 < v < 1 then 0 < ChaoticF1(v) < 1. +static inline double ChaoticF1(double v) { + return 4.0 * v * (1.0 - v); +} + +#if defined(__i386__) || defined(__x86_64__) +static bool can_do_avx() { + __builtin_cpu_init(); + return __builtin_cpu_supports("avx"); +} + +static bool can_do_avx512f() { + __builtin_cpu_init(); + return __builtin_cpu_supports("avx512f"); +} + +static bool can_do_fma() { + __builtin_cpu_init(); + return __builtin_cpu_supports("fma"); +} + +static bool can_do_fvt() { + return geteuid() == 0; // need write access to MSRs. +} + +#else +static bool can_do_avx() { return false; } +static bool can_do_avx512f() { return false; } +static bool can_do_fma() { return false; } +static bool can_do_fvt() { return false; } // x86-only for now. +#endif + +bool do_madvise = true; +bool do_repmovsb = is_x86; +bool do_sse_128_memcpy = is_x86; +bool do_avx_256_memcpy = can_do_avx(); +bool do_avx_512_memcpy = can_do_avx512f(); +bool do_avx_heavy = can_do_avx(); +bool do_compress = true; +bool do_encrypt = true; +bool do_hashes = true; +bool do_misalign = true; +bool do_hop = true; +bool do_ssl_self_check = true; +bool do_flush = false; // Default: disabled for now +bool do_provenance = false; +bool do_repstosb = is_x86; +bool do_freq_sweep = false; +bool do_freq_hi_lo = false; +bool do_noise = false; +bool do_invert_cores = false; +int fixed_min_frequency = 0; +int fixed_max_frequency = 0; +bool do_fvt = can_do_fvt(); +bool do_fast_string_ops = true; +int seconds_per_freq = 300; +uintmax_t error_limit = kErrorLimit; + +bool SetAffinity(int id) { + int err = 0; +#ifdef __linux__ + cpu_set_t cset; + CPU_ZERO(&cset); + CPU_SET(id, &cset); + err = sched_setaffinity(0, sizeof(cset), &cset); + std::atomic_thread_fence(std::memory_order_seq_cst); + if (err) { + err = errno; + } +#elif defined(__NetBSD__) + cpuset_t *cset; + cset = cpuset_create(); + if (cset == nullptr) { + LOG(ERROR) << "cpuset_create failed: " << strerror(errno); + return false; + } + cpuset_set(id, cset); + err = pthread_setaffinity_np(pthread_self(), cpuset_size(cset), cset); + std::atomic_thread_fence(std::memory_order_seq_cst); + cpuset_destroy(cset); +#endif + if (err != 0) { + LOG_EVERY_N_SECS(WARN, 30) + << "setaffinity to tid: " << id << " failed: " << strerror(err); + } + return err == 0; +} + +std::vector ReadDict() { + // Dictionary search paths + static const char *dicts[] = { + "/usr/share/dict/words", + "words", + }; + std::vector words; + std::ifstream f; + + for (const auto &d : dicts) { + f.open(d, std::ifstream::in); + if (f.is_open()) break; + f.clear(); + } + + if (!f.is_open()) return words; + + LOG(DEBUG) << "Reading words."; + + std::string word; + while (!f.eof()) { + std::getline(f, word); + words.push_back(word); + } + f.close(); + LOG(DEBUG) << "Read " << words.size() << " words."; + std::sort(words.begin(), words.end(), + [](const std::string &a, const std::string &b) { + return a.size() < b.size(); + }); + return words; +} + +class MalignBuffer { + public: + struct PunchedHole { + std::string ToString() const; + size_t start = 0; + size_t length = 0; + unsigned char v = 0x53; + }; + + enum CopyMethod { + kMemcpy, + kRepMov, + kSseBy128, + kAvxBy256, + kAvxBy512, + }; + + static std::string ToString(CopyMethod m) { + switch (m) { + case kMemcpy: + return "memcpy"; + case kRepMov: + return "rep;mov"; + case kSseBy128: + return "sse:128"; + case kAvxBy256: + return "avx:256"; + case kAvxBy512: + return "avx:512"; + } + } + + // Provides buffer with specified alignment. + MalignBuffer(size_t capacity) + : capacity_(capacity), + base_address_(aligned_alloc(pagesize, capacity + pagesize)) { + assert(base_address_); + // There are lots of places that use unitialized MalignBuffer. So just + // fill some pseudo-random bytes if cpu_check is compiled with msan. + InitializeMemoryForSanitizer(static_cast(base_address_), capacity_); + } + ~MalignBuffer() { free(base_address_); } + + // REQUIRES: alignment_offset + length <= capacity_. + void Initialize(size_t alignment_offset, size_t length); + + const char* data() const { return buffer_address_; } + char* data() { return buffer_address_; } + size_t size() const { return length_; } + + // REQUIRES alignment_offset + length <= capacity_. + void resize(size_t length); + + // Compares 'this' to 'that' returning empty string if identical. + // If not identical, returns a syndrome, currently Hamming distance, + // corrupted subrange bounds, and the diffs. + std::string Syndrome(const MalignBuffer& that) const; + + // Validated data copy from source to 'this'. + // 'this' must be appropriately sized. + // Returns syndrome upon copy failure. + std::string CopyFrom(const MalignBuffer& that, CopyMethod m); + + // Unvalidated copy to 'this'. + void CopyFrom(const char* src, size_t length, CopyMethod m); + void CopyFrom(size_t pos, const char* src, size_t length, CopyMethod m); + + // Randomly flushes cache lines. + void RandomFlush(std::knuth_b* rng) const; + + // Conventional or rep;sto memset operation, according to 'use_rep_stos'. + void Memset(size_t offset, unsigned char v, size_t length, bool use_rep_stos); + + // Memsets buffer to 'hole.v', using rep;stos operation if + // 'use_rep_stos' set; + void PunchHole(const PunchedHole& hole, bool use_rep_stos); + + private: + std::string CorruptionSyndrome(const MalignBuffer& that) const; + std::string CrackId(uint64_t) const; + + const size_t capacity_; + void* base_address_ = nullptr; + + size_t alignment_offset_ = 0; + size_t length_ = 0; // Usable length + char* buffer_address_ = nullptr; +}; + +void MalignBuffer::Initialize(size_t alignment_offset, size_t length) { + assert(alignment_offset + length <= capacity_); + alignment_offset_ = alignment_offset; + length_ = length; + buffer_address_ = static_cast(base_address_) + alignment_offset_; +} + +void MalignBuffer::resize(size_t length) { + Initialize(alignment_offset_, length); +} + +std::string MalignBuffer::CopyFrom(const MalignBuffer& that, CopyMethod m) { + CopyFrom(that.data(), that.size(), m); + return Syndrome(that); +} + +void MalignBuffer::CopyFrom(const char* src, size_t length, CopyMethod m) { + assert(size() == length); + CopyFrom(0, src, length, m); +} + +void MalignBuffer::CopyFrom(size_t pos, const char* src, size_t length, + CopyMethod m) { +assert(pos + length <= size()); +switch (m) { + case kMemcpy: + // Assumes memcpy doesn't use rep;movsb; false in lots of environments. + memcpy(data() + pos, src, length); + break; + case kRepMov: + __movsb(data() + pos, src, length); + break; + case kSseBy128: + __sse_128_memcpy(data() + pos, src, length); + break; + case kAvxBy256: + __avx_256_memcpy(data() + pos, src, length); + break; + case kAvxBy512: + __avx_512_memcpy(data() + pos, src, length); + break; +} +} + +std::string MalignBuffer::Syndrome(const MalignBuffer& that) const { + std::stringstream s; + std::string syndrome = CorruptionSyndrome(that); + if (syndrome.empty()) return ""; + s << syndrome << ", \"this\": \"" << static_cast(data()) + << "\", " + << "\"that\": \"" << static_cast(that.data()) << "\""; + return s.str(); +} + +std::string MalignBuffer::CorruptionSyndrome(const MalignBuffer& that) const { + std::stringstream s; + if (size() != that.size()) { + s << Json("unequalSizeThis", size()) << ", " + << Json("unequalSizeThat", that.size()); + return s.str(); + } + bool failed_memcmp = memcmp(data(), that.data(), that.size()); + + int wrong_bytes = 0; + int wrong_bits = 0; + int byte_faults = 0; + int first_wrong = INT_MAX; + int last_wrong = INT_MIN; + std::vector lane_errors(8, 0); + for (size_t i = 0; i < size(); i++) { + unsigned char a = *(data() + i); + unsigned char b = *(that.data() + i); + unsigned char d = a ^ b; + if (d) { + first_wrong = std::min(first_wrong, i); + last_wrong = std::max(last_wrong, i); + byte_faults |= d; + wrong_bytes++; + wrong_bits += __builtin_popcount(d); + for (size_t i = 0; i < 8; i++) { + if ((d >> i) & 1) { + lane_errors[i]++; + } + } + } + } + if (wrong_bits || wrong_bytes) { + const int range_width = (last_wrong - first_wrong) + 1; + s << Json("cmpResult", + (failed_memcmp ? "Failed_Memcmp" : "**Passed_Memcmp**")) + << ", " << Json("wrongByteCount", wrong_bytes) << ", " + << Json("wrongBitCount", wrong_bits) << ", " + << Json("corruptionWidth", range_width) << ", " + << Json("corruptStart", first_wrong) << ", " + << Json("corruptByteBitMask", byte_faults) << ", " + << "\"byBitLane\": ["; + for (size_t i = 0; i < 8; i++) { + if (i) s << ", "; + s << lane_errors[i]; + } + s << " ] "; + // Dump up to 64 corrupted locations. + std::stringstream dump; + dump << " \"byteErrors\": [ " << std::hex; + uint64_t buf_a = 0; + uint64_t buf_b = 0; + for (size_t k = 0; k < std::min(64, range_width); k++) { + uint8_t a = *(data() + first_wrong + k); + uint8_t b = *(that.data() + first_wrong + k); + if (k) dump << ", "; + dump << "[ " << std::setw(2) << "\"0x" << static_cast(a) << "\", " + << std::setw(2) << "\"0x" << static_cast(b) << "\" "; + buf_a = (buf_a >> 8) | static_cast(a) << 56; + buf_b = (buf_b >> 8) | static_cast(b) << 56; + if ((k >= 7) && (7 == ((first_wrong + k) % 8))) { + dump << ", " << CrackId(buf_a) << ", " << CrackId(buf_b); + buf_a = 0; + buf_b = 0; + } + dump << " ]"; + } + dump << " ] "; + return s.str() + ", " + dump.str(); + } else { + if (!failed_memcmp) return ""; + return Json("cmpResult", "**Failed_Memcmp**"); + } +} + +std::string MalignBuffer::CrackId(uint64_t v) const { + std::stringstream s; + s << std::hex << " [\"0x" << std::setw(4) << (v >> 48) << "\", \"0x" + << std::setw(6) << ((v >> 24) & 0xffffff) << "\", \"0x" << std::setw(6) + << (v & 0xffffff) << "\"]"; + return s.str(); +} + +void MalignBuffer::RandomFlush(std::knuth_b* rng) const { +#if defined(__i386__) || defined(__x86_64__) + // Note: no barriers used. + const char* p = buffer_address_ + alignment_offset_; + while (p < buffer_address_ + length_) { + if (std::uniform_int_distribution(0, 1)(*rng)) { + __builtin_ia32_clflush(p); + } + p += cache_line_size; + } +#endif +} + +std::string MalignBuffer::PunchedHole::ToString() const { + if (length) { + return JsonRecord("hole", + Json("start", start) + ", " + Json("length", length) + + ", " + Json("v", static_cast(v))); + } else { + return JsonNull("hole"); + } +} + +void MalignBuffer::Memset(size_t offset, unsigned char v, size_t length, + bool use_rep_stos) { + if (use_rep_stos) { + __stosb(data() + offset, v, length); + } else { + memset(data() + offset, v, length); + } +} + +void MalignBuffer::PunchHole(const PunchedHole& hole, bool use_rep_stos) { + if (hole.length) { + Memset(hole.start, hole.v, hole.length, use_rep_stos); + } +} + +// x86 AVX usage has complicated core power effects. This code tries +// to provoke some power transitions that don't otherwise happen. +// While it's at it, it lightly checks results, but that's not the central +// goal. ToDo: maybe toughen the correctness checking. +// +// The power policies are governed by a number of opaque parameters; this code +// is based on a lot of guesses. + +class Avx { + public: + Avx() {} + + // Activate AVX depending on throw of the dice. + // Returns syndrome if computational error detected. + std::string MaybeGoHot(); + + // Does a bit of computing if in a "hot" mode. + // Returns syndrome if computational error detected. + std::string BurnIfAvxHeavy(); + + private: + constexpr static int kIterations = 5000; + std::string Avx256(int rounds); + std::string Avx256FMA(int rounds); + std::string Avx512(int rounds); + int level_ = 0; + std::knuth_b rng_; +}; + +std::string Avx::MaybeGoHot() { + if (std::uniform_int_distribution(0, 1)(rng_)) { + // Don't provoke. + level_ = 0; + return ""; + } + if (can_do_avx512f()) { + // Processor supports both AVX and AVX512. + level_ = std::uniform_int_distribution(0, 1)(rng_) ? 3 : 5; + } else { + // Processor supports only AVX. + level_ = 3; + } + return BurnIfAvxHeavy(); +} + +std::string Avx::BurnIfAvxHeavy() { + if (level_ == 3) { + return can_do_fma() ? Avx256FMA(kIterations) : Avx256(kIterations); + } + if (level_ == 5) { + return Avx512(kIterations); + } + return ""; +} + +// See notes for Avx512 below +X86_TARGET_ATTRIBUTE("avx") +std::string Avx::Avx256(int rounds) { +#if (defined(__i386__) || defined(__x86_64__)) + const __m256d minus_four = _mm256_set1_pd(-4.0); + __m256d x[4]; + for (int k = 0; k < 4; k++) { + x[k] = + _mm256_set1_pd(std::uniform_real_distribution(0.0, 1.0)(rng_)); + } + double *gross_x[4] = { + reinterpret_cast(&x[0]), + reinterpret_cast(&x[1]), + reinterpret_cast(&x[2]), + reinterpret_cast(&x[3]), + }; + for (int i = 0; i < rounds; i++) { + __m256d a[4]; + a[0] = _mm256_sub_pd(_mm256_mul_pd(x[0], x[0]), x[0]); + a[1] = _mm256_sub_pd(_mm256_mul_pd(x[1], x[1]), x[1]); + a[2] = _mm256_sub_pd(_mm256_mul_pd(x[2], x[2]), x[2]); + a[3] = _mm256_sub_pd(_mm256_mul_pd(x[3], x[3]), x[3]); + x[0] = _mm256_mul_pd(minus_four, a[0]); + x[1] = _mm256_mul_pd(minus_four, a[1]); + x[2] = _mm256_mul_pd(minus_four, a[2]); + x[3] = _mm256_mul_pd(minus_four, a[3]); + } + for (int k = 1; k < 4; k++) { + for (int i = 0; i < 4; i++) { + if (gross_x[k][i] != gross_x[k][0]) { + return "avx256 pd"; + } + } + } +#endif + return ""; +} + +// See notes for Avx512 below +X86_TARGET_ATTRIBUTE("avx,fma") +std::string Avx::Avx256FMA(int rounds) { +#if (defined(__i386__) || defined(__x86_64__)) + const __m256d minus_four = _mm256_set1_pd(-4.0); + __m256d x[4]; + for (int k = 0; k < 4; k++) { + x[k] = + _mm256_set1_pd(std::uniform_real_distribution(0.0, 1.0)(rng_)); + } + double *gross_x[4] = { + reinterpret_cast(&x[0]), + reinterpret_cast(&x[1]), + reinterpret_cast(&x[2]), + reinterpret_cast(&x[3]), + }; + for (int i = 0; i < rounds; i++) { + __m256d a[4]; + a[0] = _mm256_fmsub_pd(x[0], x[0], x[0]); + a[1] = _mm256_fmsub_pd(x[1], x[1], x[1]); + a[2] = _mm256_fmsub_pd(x[2], x[2], x[2]); + a[3] = _mm256_fmsub_pd(x[3], x[3], x[3]); + x[0] = _mm256_mul_pd(minus_four, a[0]); + x[1] = _mm256_mul_pd(minus_four, a[1]); + x[2] = _mm256_mul_pd(minus_four, a[2]); + x[3] = _mm256_mul_pd(minus_four, a[3]); + } + for (int k = 1; k < 4; k++) { + for (int i = 0; i < 4; i++) { + if (gross_x[k][i] != gross_x[k][0]) { + return "avx256 pd"; + } + } + } +#endif + return ""; +} + +// Interleave AVX512 parallel calculation of iterates of f(x) = 4x(1-x). +// Hope compiler too dumb to see through this. +X86_TARGET_ATTRIBUTE("avx512f") +std::string Avx::Avx512(int rounds) { +#if (defined(__i386__) || defined(__x86_64__)) + const __m512d minus_four = _mm512_set1_pd(-4.0); + __m512d x[4]; + for (int k = 0; k < 4; k++) { + x[k] = + _mm512_set1_pd(std::uniform_real_distribution(0.0, 1.0)(rng_)); + } + + double *gross_x[4] = { + reinterpret_cast(&x[0]), + reinterpret_cast(&x[1]), + reinterpret_cast(&x[2]), + reinterpret_cast(&x[3]), + }; + + for (int i = 0; i < rounds; i++) { + __m512d a[4]; + a[0] = _mm512_fmsub_pd(x[0], x[0], x[0]); + a[1] = _mm512_fmsub_pd(x[1], x[1], x[1]); + a[2] = _mm512_fmsub_pd(x[2], x[2], x[2]); + a[3] = _mm512_fmsub_pd(x[3], x[3], x[3]); + x[0] = _mm512_mul_pd(minus_four, a[0]); + x[1] = _mm512_mul_pd(minus_four, a[1]); + x[2] = _mm512_mul_pd(minus_four, a[2]); + x[3] = _mm512_mul_pd(minus_four, a[3]); + } + + for (int k = 1; k < 4; k++) { + for (int i = 0; i < 7; i++) { + if (gross_x[k][i] != gross_x[k][0]) { + return "avx512 pd"; + } + } + } +#endif + return ""; +} + +// Produces noise of all kinds by running intermittently. +// There's a coarse cycle with four fine phases: +// Phase 0: Off +// Phase 1: High-speed on-off +// Phase 2: On +// Phase 3: High-speed on-off +class NoiseScheduler { + public: + // The following constants are just plain made up outta whole cloth. + // You could consider various power regulation time constants and thermal + // intertia and so forth. Or just make it up. + static constexpr int kCoarseMillis = 5000; // Coarse period in millis + static constexpr int kFineMillis = 50; // Fine period in millis + + // Blocks until next scheduled activity + static void BlockUntilOn() { + bool was_blocked = false; + while (true) { + int64_t t = 1e3 * TimeInSeconds(); + int64_t coarse_block = t / kCoarseMillis; + int64_t phase = coarse_block % 4; + if (phase == 2) { + if (was_blocked) { + // LOG(INFO) << "Coarse grained unblock"; + } + was_blocked = false; + return; // On + } + if (phase == 0) { + // Wait til next phase and then re-evaluate. + SleepForMillis(((coarse_block + 1) * kCoarseMillis) - t); + was_blocked = true; + continue; + } + // Fine phase. + int64_t fine_block = t / kFineMillis; + if (fine_block % 2) { + if (was_blocked) { + // LOG(INFO) << "Fine grained unblock"; + } + was_blocked = false; + return; // Fine-grained on + } + // Wait til next fine block and then re-evaluate. + SleepForMillis(((fine_block + 1) * kFineMillis) - t); + was_blocked = true; + } + } +}; + +// Rudimentary coherence/uncore tester. +// Randomly assigns each slot of a seemingly shared buffer to a single tid, +// creating only "false sharing". +// Thus each slot, regardless of alignment, must obey program order unless the +// machine is broken. +// To be toughened, e.g.: +// Widen the slots a bit +// Control the sharing more tightly, e.g. each cache line split between 2 tids +// Maybe checksum the indices to distinguish core-local compute errors from +// coherence errors, but that's perhaps easier said than done effectively. +// As it stands, it may be particularly hard to localize failures. Though that's +// always going to be a bit hard, which is the point. One technique might be +// to leave this alone and to run on subsets of cores and sockets. +class Silkscreen { + public: + static constexpr size_t kSize = 1000 * 1000; // Size of buffer + + Silkscreen(const std::vector &tid_list); + ~Silkscreen() { free(buffer_address_); } + + // Writes value derived from 'round' into all slots owned by 'tid'. + // Returns number of slots written. + uint64_t WriteMySlots(int tid, uint64_t round); + + // Returns JSON-formatted error string if slot 'k' belongs to 'tid' and has + // value not properly corresponding with 'round'. + // Returns "=" if slot 'k' belongs to 'tid' and has expected value. + // Otherwise, if slot 'k' does not belong to 'tid', returns empty string. + std::string CheckMySlot(int tid, uint64_t round, size_t k) const; + + private: + int owner(size_t k) const { return owner_[k]; } + size_t size() const { return owner_.size(); } + const char* data(size_t k) const { return buffer_address_ + k; } + char* data(size_t k) { return buffer_address_ + k; } + + std::vector owner_; // const after initialization + char* buffer_address_ = nullptr; +}; + +Silkscreen::Silkscreen(const std::vector &tid_list) + : buffer_address_(static_cast(aligned_alloc( + pagesize, + pagesize * ((kSize + pagesize - 1) / pagesize) ))) { + std::knuth_b rng; + std::uniform_int_distribution dist(0, tid_list.size() - 1); + for (size_t k = 0; k < kSize; k++) { + size_t w = dist(rng); + owner_.push_back(tid_list[w]); + } +} + +uint64_t Silkscreen::WriteMySlots(int tid, uint64_t round) { + uint64_t j = 0; + for (size_t k = 0; k < kSize; k++) { + if (owner(k) == tid) { + *data(k) = static_cast(round); + j++; + } + } + return j; +} + +std::string Silkscreen::CheckMySlot(int tid, uint64_t round, size_t k) const { + if (owner(k) != tid) return ""; + const int v = *data(k); + const int w = static_cast(round); + if (v == w) return "="; + return + Json("position", k) + ", " + Json("is", v) + ", " + Json("expected", w); +} + +class Worker { + public: + // Does not take ownership of 'silkscreen'. + Worker(int pid, const std::vector *words, + std::vector tid_list, int tid, Silkscreen *silkscreen) + : pid_(pid), tid_(tid), words_(words), tid_list_(tid_list), + silkscreen_(silkscreen), rndeng_(std::random_device()()) { + } + ~Worker() {} + void Run(); + + private: + static constexpr size_t kBufMin = 12; +#ifdef HAVE_FEATURE_MEMORY_SANITIZER + // Use smaller buffers if cpu_check is built with msan. Otherwise + // we will time out in testing. + static constexpr size_t kBufMax = 1 << 16; // 64 KiB +#else + static constexpr size_t kBufMax = 1 << 20; // 1 MiB +#endif + + struct FloatingPointResults { + bool operator!=(const FloatingPointResults& other) const { + return d != other.d; + } + + double d = 0.0; + }; + + typedef struct { + const char *name; + FloatingPointResults (Worker::*func)( + uint32_t seed, MalignBuffer::CopyMethod copy_method, + bool use_repstos, MalignBuffer*) const; + } generatorItem; + + struct BufferSet { + void Alloc(std::unique_ptr *p) { + const size_t kBufCap = kBufMax + 23 * pagesize; + if (!*p) { + p->reset(new MalignBuffer(kBufCap)); + } + } + + std::unique_ptr original; + std::unique_ptr compressed; + std::unique_ptr encrypted; + std::unique_ptr copied; + std::unique_ptr decrypted; + std::unique_ptr decompressed; + std::unique_ptr re_made; + }; + + FloatingPointResults FillBufferSystematic( + uint32_t unused_seed, MalignBuffer::CopyMethod copy_method, + bool use_repstos, MalignBuffer* b) const; + FloatingPointResults FillBufferRandomData( + uint32_t seed, MalignBuffer::CopyMethod copy_method, + bool use_repstos, MalignBuffer* b) const; + FloatingPointResults FillBufferRandomText( + uint32_t seed, MalignBuffer::CopyMethod copy_method, + bool use_repstos, MalignBuffer* b) const; + FloatingPointResults FillBufferGrilledCheese( + uint32_t seed, MalignBuffer::CopyMethod copy_method, + bool use_repstos, MalignBuffer* b) const; + + void MadviseDontNeed(const MalignBuffer &s) const; + size_t Alignment(); + void MaybeFlush(const MalignBuffer &s); + // Attempts to schedules CPU frequency using the Worker's. + // FVTController object. Returns the scheduled frequency or + // 0 if there is no FVTController available. + int ScheduledMHz() const; + MalignBuffer::CopyMethod CopyMethod(); + std::string FVT() const; + MalignBuffer::PunchedHole PunchedHole(size_t bufsize); + FloatingPointResults GenerateData( + const generatorItem& generator, + const MalignBuffer::PunchedHole& hole, + uint32_t seed, MalignBuffer::CopyMethod copy_method, + bool use_repstos, MalignBuffer* b) const; + + // Emits a failure record. + // TODO: bump error count here, and maybe log, instead of at every + // call site. + std::string Jfail(const std::string &err, const std::string &v) { + if (errorCount > error_limit) { + exiting = true; + LOG(INFO) << "I am quitting after " << errorCount << " errors"; + } + return "{ " + JsonRecord("fail", Json("err", err) + ", " + v) + ", " + + JTag() + " }"; + } + + // Array of random data generators. + static const std::vector kGenerators; + + const uint64_t pid_; + const int tid_; + const std::vector *words_; + const std::vector tid_list_; + Silkscreen* const silkscreen_; + + // We don't really need "good" random numbers. + // std::mt19937_64 rndeng_; + std::knuth_b rndeng_; + uint64_t round_ = 0; + + std::unique_ptr fvt_controller_; +}; + +const std::vector Worker::kGenerators = { + {"SYSTEMATIC", &Worker::FillBufferSystematic}, + {"DATA", &Worker::FillBufferRandomData}, + {"TEXT", &Worker::FillBufferRandomText}, + {"CHEESE", &Worker::FillBufferGrilledCheese}, +}; + +std::string Worker::FVT() const { + if (fvt_controller_ == nullptr) return ""; + return fvt_controller_->FVT(); +} + +int Worker::ScheduledMHz() const { + if (fvt_controller_ == nullptr) { + return 0; + } + + if (fixed_min_frequency && (fixed_min_frequency == fixed_max_frequency)) { + // User-specified fixed frequency. + return fixed_min_frequency; + } + if (!do_freq_sweep && !do_freq_hi_lo + && !fixed_min_frequency && !fixed_max_frequency) { + // Run at maximum frequency. + return fvt_controller_->limit_mHz(); + } + + const int low_f = + fixed_min_frequency ? fixed_min_frequency : fvt_controller_->kMinTurboMHz; + // hi_f cannot exceed limit + const int limit_mHz = fvt_controller_->limit_mHz(); + const int hi_f = fixed_max_frequency + ? std::min(fixed_max_frequency, limit_mHz) + : limit_mHz; + + int64_t t = TimeInSeconds() / seconds_per_freq; + if (do_freq_hi_lo) { + const int step = t % 2; + return step ? low_f : hi_f; + } else { + const int steps = 1 + (hi_f - low_f) / 100; + const int full_ramps = t / steps; + const bool upwards = full_ramps % 2; + const int step = t % steps; + return upwards ? low_f + 100 * step : hi_f - 100 * step; + } +} + +// Fills 'b' with a systematic pattern. +// Returns iterate of chaotic floating point function of 'seed', with some +// reciprocal torture. +Worker::FloatingPointResults Worker::FillBufferSystematic( + uint32_t seed, MalignBuffer::CopyMethod copy_method, + bool use_repstos, MalignBuffer* b) const { + // Format: 2 bytes of PID, 3 bytes of round number, 3 bytes of offset. + // Note: Perhaps should be AC-modulated. Perhaps should be absolute aligned + // for easier recognition. + // Note: appropriate for LE machines only. + FloatingPointResults fp; + fp.d = std::max(seed, 2); + for (size_t i = 0; i * 8 < b->size(); i++) { + const size_t p = 8 * i; + const size_t k = std::min(8, b->size() - p); + const uint64_t v = + ((pid_ & 0xffff) << 48) + | ((round_ & 0xffffff) << 24) | (i & 0xffffff); + for (size_t m = 0; m < k; m++) { + (b->data())[p + m] = *(reinterpret_cast(&v) + m); + } + fp.d = 1.0 / ChaoticF1(1.0 / fp.d); + } + fp.d = 1.0 / fp.d; + return fp; +} + +Worker::FloatingPointResults Worker::FillBufferRandomData( + uint32_t seed, MalignBuffer::CopyMethod copy_method, + bool use_repstos, MalignBuffer* b) const { + std::knuth_b rng(seed); + std::uniform_int_distribution + dist(0, std::numeric_limits::max()); + size_t p = 0; + const size_t length = b->size(); + // Repeatedly append random number (one to eight) random bytes. + while (p < length) { + const size_t max_span = std::min(length - p, 8); + const size_t z = std::uniform_int_distribution(1, max_span)(rng); + const uint64_t v = dist(rng); + b->CopyFrom(p, reinterpret_cast(&v), z, copy_method); + p += z; + } + return FloatingPointResults(); +} + +Worker::FloatingPointResults Worker::FillBufferRandomText( + uint32_t seed, MalignBuffer::CopyMethod copy_method, + bool use_repstos, MalignBuffer* b) const { + std::knuth_b rng(seed); + std::exponential_distribution dist(20); + const size_t bufsize = b->size(); + size_t pos = 0; + while (pos < bufsize) { + const size_t r = std::min(static_cast(dist(rng) * words_->size()), + words_->size() - 1); + const auto &word = (*words_)[r]; + const size_t wordlen = word.size(); + if (pos + wordlen >= bufsize) { + break; + } + b->CopyFrom(pos, word.c_str(), wordlen, copy_method); + pos += wordlen; + if (pos < bufsize) { + b->Memset(pos, ' ', 1, use_repstos); + pos++; + } + } + // Pad with spaces + b->Memset(pos, ' ', bufsize - pos, use_repstos); + return FloatingPointResults(); +} + +// memset (conventional or rep;stos) randomly aligned, random width, randomly +// overlapped stretches of buffer. Constants aim to hit multiple times in cache +// lines and buffers. Untuned and based on nothing but hunches. +Worker::FloatingPointResults Worker::FillBufferGrilledCheese( + uint32_t seed, MalignBuffer::CopyMethod copy_method, bool use_repstos, + MalignBuffer* b) const { + std::knuth_b rng(seed); + const size_t kAdvance = 15; + const size_t kWindow = 64; + unsigned char flavor = 0; + b->Memset(0, 0, b->size(), use_repstos); + for (int base = kWindow; base < b->size(); base += kAdvance) { + if (std::uniform_int_distribution(0, 1)(rng)) continue; + flavor++; + const size_t start = + std::uniform_int_distribution(base - kWindow, base)(rng); + const size_t end = std::uniform_int_distribution(start, base)(rng); + b->Memset(start, flavor, 1 + end - start, use_repstos); + } + return FloatingPointResults(); +} + +Worker::FloatingPointResults Worker::GenerateData( + const generatorItem& generator, + const MalignBuffer::PunchedHole& hole, + uint32_t seed, MalignBuffer::CopyMethod copy_method, + bool use_repstos, + MalignBuffer* b) const { + const FloatingPointResults f = + (this->*generator.func)(seed, copy_method, use_repstos, b); + b->PunchHole(hole, use_repstos); + return f; +} + +// Hints to the OS to release the buffer's memory. +void Worker::MadviseDontNeed(const MalignBuffer &s) const { + // Round up the buffer start address to a page boundary. + intptr_t start = ((intptr_t) s.data() + pagesize - 1) & ~(pagesize - 1); + // Round down the buffer end address to a page boundary. + intptr_t end = ((intptr_t) (s.data() + s.size() - 1)) & ~(pagesize - 1); + if (end - start >= pagesize) { + if (madvise((char *)start, end - start, MADV_DONTNEED) == -1) { + LOG(WARN) << "tid " << tid_ + << " madvise(MADV_DONTNEED) failed: " << strerror(errno); + } + } +} + +void Worker::MaybeFlush(const MalignBuffer& s) { + // Half the time, tell the OS to release the destination buffer. + if (do_flush && std::uniform_int_distribution(0, 1)(rndeng_)) { + s.RandomFlush(&rndeng_); + } +} + +size_t Worker::Alignment() { + return do_misalign ? + std::uniform_int_distribution(0, pagesize)(rndeng_) : 0; +} + +MalignBuffer::CopyMethod Worker::CopyMethod() { + std::vector v; + v.push_back(MalignBuffer::kMemcpy); + if (do_repmovsb) { + // Weight rep;mov more heavily. + for (int i = 0; i < 3; i++) { + v.push_back(MalignBuffer::kRepMov); + } + } + if (do_sse_128_memcpy) v.push_back(MalignBuffer::kSseBy128); + if (do_avx_256_memcpy) v.push_back(MalignBuffer::kAvxBy256); + if (do_avx_512_memcpy) v.push_back(MalignBuffer::kAvxBy512); + size_t k = std::uniform_int_distribution(0, v.size() - 1)(rndeng_); + return v[k]; +} + +MalignBuffer::PunchedHole Worker::PunchedHole(size_t bufsize) { + MalignBuffer::PunchedHole hole; + hole.length = + std::uniform_int_distribution( + 1, std::min(bufsize, 8192))(rndeng_); + hole.start = + std::uniform_int_distribution( + 0, bufsize - hole.length)(rndeng_); + return hole; +} + +std::string OpenSSL_Hash(const MalignBuffer &s, const EVP_MD *type) { + EVP_MD_CTX *ctx; + ctx = EVP_MD_CTX_create(); + EVP_DigestInit_ex(ctx, type, nullptr); + std::string hash; + hash.resize(EVP_MD_CTX_size(ctx)); + InitializeMemoryForSanitizer(hash.data(), EVP_MD_CTX_size(ctx)); + EVP_DigestUpdate(ctx, s.data(), s.size()); + EVP_DigestFinal_ex(ctx, (uint8_t *)&hash[0], nullptr); + EVP_MD_CTX_destroy(ctx); + return HexStr(hash); +} + +void Worker::Run() { + // Array of hash/checksum routines. + typedef struct { + const char *name; + std::string (*func)(const MalignBuffer &); + } hashItem; + std::vector hashers = { + { + "MD5", + [](const MalignBuffer &s) -> std::string { + return OpenSSL_Hash(s, EVP_md5()); + }, + }, + { + "SHA1", + [](const MalignBuffer &s) -> std::string { + return OpenSSL_Hash(s, EVP_sha1()); + }, + }, + { + "SHA256", + [](const MalignBuffer &s) -> std::string { + return OpenSSL_Hash(s, EVP_sha256()); + }, + }, + { + "SHA512", + [](const MalignBuffer &s) -> std::string { + return OpenSSL_Hash(s, EVP_sha512()); + }, + }, + { + "ADLER32", // exported by zlib + [](const MalignBuffer &s) -> std::string { + uLong c = adler32(0, Z_NULL, 0); + c = adler32(c, (const Bytef *)s.data(), s.size()); + return HexData((const char *)&c, sizeof(c)); + }, + }, + { + "CRC32", // exported by zlib. + [](const MalignBuffer &s) -> std::string { + uLong c = crc32(0, Z_NULL, 0); + c = crc32(c, (const Bytef *)s.data(), s.size()); + return HexData((const char *)&c, sizeof(c)); + }, + }, + { + "CRC32C", // crc32 instruction on SSSE3 + [](const MalignBuffer &s) -> std::string { + uint32_t c = crc32c(s.data(), s.size()); + return HexData((const char *)&c, sizeof(c)); + }, + }, + { + "FarmHash64", // Google farmhash + [](const MalignBuffer &s) -> std::string { + uint64_t c = util::Hash64(s.data(), s.size()); + return HexData((const char *)&c, sizeof(c)); + }, + }, + }; + + // Array of compression routines. + typedef struct { + const char *name; + int (*enc)(MalignBuffer *, const MalignBuffer &); + int (*dec)(MalignBuffer *, const MalignBuffer &); + } compressorItem; + std::vector compressors = { + { + "ZLIB", + [](MalignBuffer *o, const MalignBuffer &s) { + uLongf olen = compressBound(s.size()); + o->resize(olen); + int err = compress2((Bytef *)o->data(), &olen, (Bytef *)s.data(), + s.size(), Z_BEST_SPEED); + if (err != Z_OK) { + LOG(DEBUG) << "zlib compression failed: " << err + << " srclen: " << s.size() + << " destlen: " << o->size(); + return err; + } + o->resize(olen); + return 0; + }, + [](MalignBuffer *o, const MalignBuffer &s) { + uLongf olen = o->size(); + int err = uncompress((Bytef *)o->data(), &olen, (Bytef *)s.data(), + s.size()); + if (err != Z_OK) { + LOG(DEBUG) << "zlib decompression failed: " << err + << " srclen: " << s.size() + << " destlen: " << o->size(); + return err; + } + o->resize(olen); + return 0; + }, + }, + }; + + // Choose generator and compressor uniformly. + std::uniform_int_distribution gen_dist( + 0, do_provenance ? 0 : kGenerators.size() - 1); + auto Gen = std::bind(gen_dist, rndeng_); + std::uniform_int_distribution comp_dist(0, compressors.size() - 1); + auto Comp = std::bind(comp_dist, rndeng_); + + // Run one randomly-chosen hash routine each round. + size_t hash_choice; + std::string hash_value; + + EVP_CIPHER_CTX *cipher_ctx; + cipher_ctx = EVP_CIPHER_CTX_new(); + + // Creates FVT controller if we can do so. + if (do_fvt) { + fvt_controller_ = FVTController::Create(tid_); + fvt_controller_->SetCurrentFreqLimitMhz(fvt_controller_->limit_mHz()); + fvt_controller_->ControlFastStringOps(do_fast_string_ops); + LOG(INFO) << "Tid: " << tid_ + << " Enables: " << fvt_controller_->InterestingEnables(); + } + + // MalignBuffers are allocated once if !do_madvise. Otherwise they are + // reallocated each iteration of the main loop, creating much more memory + // allocator work, which may itself exhibit, and suffer from, CPU defects. + std::unique_ptr b; + + uint64_t expected_slot_count = 0; + + Avx avx; + + while (!exiting) { + if (std::thread::hardware_concurrency() > 1) { + if (!SetAffinity(tid_)) { + LOG(WARN) << "Couldnt run on " << tid_ << " sleeping a bit"; + sleep(30); + continue; + } + } + round_++; + if (!b) { + b.reset(new BufferSet); + } + + if (do_noise) { + NoiseScheduler::BlockUntilOn(); + } + + const int turbo_mhz = ScheduledMHz(); // 0 if no FVT. + if (fvt_controller_ != nullptr) { + fvt_controller_->SetCurrentFreqLimitMhz(turbo_mhz); + fvt_controller_->MonitorFrequency(); + } + + auto Turbo = [&turbo_mhz](){ + return Json("turbo", turbo_mhz); + }; + + auto Tid = [this](int tid) { + return "{ " + Json("tid", tid) + (do_fvt ? ", " + FVT() : "") + " }"; + }; + + auto Writer = [this, Tid](){ + return "\"writer\": " + Tid(tid_); + }; + + LOG_EVERY_N_SECS(INFO, 30) << Jstat( + Json("elapsed_s", static_cast(TimeInSeconds() - t0)) + ", " + + Json("failures", errorCount.load()) + ", " + + Json("successes", successCount.load()) + ", " + Writer() + + (fvt_controller_ != nullptr + ? ", " + Json("meanFreq", fvt_controller_->GetMeanFreqMhz()) + + ", " + Json("maxFreq", fvt_controller_->max_mHz()) + : "")); + + if (do_avx_heavy) { + const std::string e = avx.MaybeGoHot(); + if (!e.empty()) { + LOG(ERROR) << Jfail(e, Writer() + ", " + Turbo()); + errorCount++; + } + } + +#ifdef USE_BORINGSSL + if (do_ssl_self_check && BORINGSSL_self_test() == 0) { + LOG(ERROR) << Jfail("BORINGSSL_self_test", Writer() + ", " + Turbo()); + errorCount++; + continue; + } +#endif + + const bool madvise = + do_madvise && std::uniform_int_distribution(0, 1)(rndeng_); + + const size_t bufsize = + std::uniform_int_distribution(kBufMin, kBufMax)(rndeng_); + auto &gen = kGenerators[Gen()]; + auto &comp = compressors[Comp()]; + + const MalignBuffer::PunchedHole hole = PunchedHole(bufsize); + + const MalignBuffer::CopyMethod copy_method = CopyMethod(); + + const bool use_repstos = + do_repstosb && std::uniform_int_distribution(0, 1)(rndeng_); + + auto BlockSummary = [&]() { + std::stringstream block_summary; + block_summary + << Json("pattern", gen.name) << ", " + << Json("copy", MalignBuffer::ToString(copy_method)) << ", " + << Json("memset", use_repstos ? "rep;sto" : "memset") << ", " + << JsonBool("madvise", madvise) << ", " + << Json("size", bufsize) << ", " + << Json("pid", pid_) << ", " + << Json("round", round_) << ", " + << hole.ToString(); + return block_summary.str(); + }; + + auto WriterInfo = [&](){ + return Writer() + ", " + Turbo() + ", " + BlockSummary(); + }; + + if (round_ > 1) { + uint64_t slots_read = 0; + uint64_t errs_this_round = 0; + const uint64_t kErrLimit = 20; + for (size_t k = 0; k < Silkscreen::kSize; k++) { + const std::string err = silkscreen_->CheckMySlot(tid_, round_ - 1, k); + if (!err.empty()) { + slots_read++; + if (err != "=") { + errs_this_round++; + if (errs_this_round <= kErrLimit) { + errorCount++; + LOG(ERROR) << Jfail("Silkscreen", JsonRecord("syndrome", err) + + ", " + WriterInfo()); + } else { + // When Silkscreen fails, it often fails, on several bad machines, + // in a surprising way: all slots owned by reading tid are + // are corrupt, in a way that suggests the previous round's + // writes never happened. Weird, and deserves some study, but + // meanwhile the log spew is suppressed. + } + } + } + } + if (errs_this_round > kErrLimit) { + LOG(ERROR) << Jfail( + "Silkscreen", + JsonRecord("syndrome", Json("many_errors", errs_this_round) + ", " + + Json("slots_read", slots_read)) + + ", " + WriterInfo()); + errorCount++; + } + if (expected_slot_count != slots_read) { + LOG(ERROR) << Jfail("Silkscreen", + Json("read", slots_read) + ", " + + Json("expected", expected_slot_count) + ", " + + WriterInfo()); + errorCount++; + } + } + const uint64_t slots_written = silkscreen_->WriteMySlots(tid_, round_); + if (!expected_slot_count) { + expected_slot_count = slots_written; + } + if (expected_slot_count != slots_written) { + LOG(ERROR) << Jfail("Silkscreen", + Json("written", slots_written) + ", " + + Json("expected", expected_slot_count) + ", " + + WriterInfo()); + errorCount++; + } + + if (do_avx_heavy) { + // If we tried to do AVX heavy stuff. Try to run AVX heavy again to try + // to spike current. + const std::string e = avx.BurnIfAvxHeavy(); + if (!e.empty()) { + LOG(ERROR) << Jfail(e, Writer() + ", " + Turbo()); + errorCount++; + } + } + + const auto buffer_seed = rndeng_(); + if (!b->original) b->Alloc(&b->original); + b->original->Initialize(Alignment(), bufsize); + const FloatingPointResults f = + GenerateData(gen, hole, buffer_seed, + copy_method, use_repstos, b->original.get()); + MaybeFlush(*b->original); + + MalignBuffer* head = b->original.get(); + + if (do_hashes) { + hash_choice = + std::uniform_int_distribution(0, hashers.size() - 1)(rndeng_); + hash_value = hashers[hash_choice].func(*head); + } + + if (do_compress) { + // Run our randomly chosen compressor. + if (!b->compressed) b->Alloc(&b->compressed); + b->compressed->Initialize(Alignment(), bufsize); + MaybeFlush(*b->compressed); + + const int err = comp.enc(b->compressed.get(), *head); + LOG(DEBUG) << WriterInfo() + << " original->size(): " << head->size() + << ", compressed.size(): " << b->compressed->size() << "."; + if (err) { + LOG(ERROR) << Jfail("Compression", + Json("syndrome", err) + ", " + WriterInfo()); + errorCount++; + continue; + } + MaybeFlush(*b->compressed); + head = b->compressed.get(); + LOG(DEBUG) << WriterInfo() << "compress done."; + if (exiting) break; + } + + const unsigned char key[33] = "0123456789abcdef0123456789abcdef"; + const std::string ivec(b->original->data(), kBufMin); + unsigned char gmac[16]; + + const MalignBuffer* const unencrypted = head; + if (do_encrypt) { + // Encrypt. + if (!b->encrypted) b->Alloc(&b->encrypted); + b->encrypted->Initialize(Alignment(), head->size()); + MaybeFlush(*b->encrypted); + int enc_len = 0, enc_unused_len = 0; + EVP_CipherInit_ex(cipher_ctx, EVP_aes_256_gcm(), NULL, key, + (unsigned char *)ivec.data(), 1); + + if (madvise) MadviseDontNeed(*b->encrypted); + if (EVP_CipherUpdate( + cipher_ctx, (unsigned char *)b->encrypted->data(), &enc_len, + (unsigned char *)head->data(), head->size()) != 1) { + LOG(ERROR) << Jfail("EVP_CipherUpdate", WriterInfo()); + errorCount++; + EVP_CIPHER_CTX_cleanup(cipher_ctx); + continue; + } + if (EVP_CipherFinal_ex(cipher_ctx, nullptr, &enc_unused_len) != 1) { + LOG(ERROR) << Jfail("encrypt_EVP_CipherFinal_ex", WriterInfo()); + errorCount++; + EVP_CIPHER_CTX_cleanup(cipher_ctx); + continue; + } + enc_len += enc_unused_len; + if (enc_len != (int)b->encrypted->size()) { + std::stringstream ss; + ss << "enc_length: " << enc_len << " vs: " << b->encrypted->size(); + LOG(ERROR) << Jfail("encrypt_length_mismatch", + Json("syndrome", ss.str()) + ", " + WriterInfo()); + errorCount++; + EVP_CIPHER_CTX_cleanup(cipher_ctx); + continue; + } + if (EVP_CIPHER_CTX_ctrl(cipher_ctx, EVP_CTRL_GCM_GET_TAG, sizeof(gmac), + gmac) != 1) { + LOG(ERROR) << Jfail("EVP_CTRL_GCM_GET_TAG", WriterInfo()); + errorCount++; + EVP_CIPHER_CTX_cleanup(cipher_ctx); + continue; + } + EVP_CIPHER_CTX_cleanup(cipher_ctx); + MaybeFlush(*b->encrypted); + head = b->encrypted.get(); + LOG(DEBUG) << "Encrypt done " << WriterInfo(); + if (exiting) break; + } + + // Make a copy. + if (!b->copied) b->Alloc(&b->copied); + b->copied->Initialize(Alignment(), head->size()); + MaybeFlush(*b->copied); + if (madvise) MadviseDontNeed(*b->copied); + std::string syndrome = b->copied->CopyFrom(*head, copy_method); + + if (!syndrome.empty()) { + LOG(ERROR) << Jfail( + "writer-detected-copy", + JsonRecord("syndrome", syndrome) + ", " + WriterInfo()); + errorCount++; + continue; + } + MaybeFlush(*b->copied); + + // Switch to an alternate CPU. + + int newcpu = tid_; + if (do_hop && std::thread::hardware_concurrency() > 1) { + std::vector cpus; + for (int i : tid_list_) { + if (i == tid_) continue; + cpus.push_back(i); + } + if (!cpus.empty()) { + int cpuoff = + std::uniform_int_distribution(0, cpus.size() - 1)(rndeng_); + newcpu = cpus[cpuoff]; + cpus.erase(cpus.begin() + cpuoff); + if (!SetAffinity(newcpu)) { + // Tough luck, can't run on chosen CPU. + // Validate on same cpu we were on. + newcpu = tid_; + } + } + } + + auto Reader = [&Tid, &newcpu](){ + return "\"reader\": " + Tid(newcpu); + }; + auto WriterReaderInfo = [&](){ + return + Writer() + ", " + Reader() + ", " + Turbo() + ", " + BlockSummary(); + }; + + // Re-verify buffer copy + syndrome = b->copied->Syndrome(*head); + if (!syndrome.empty()) { + LOG(ERROR) << Jfail( + "copy", JsonRecord("syndrome", syndrome) + ", " + WriterInfo()); + errorCount++; + continue; + } + + MaybeFlush(*b->copied); + head = b->copied.get(); + + if (do_encrypt) { + // Decrypt. + if (!b->decrypted) b->Alloc(&b->decrypted); + b->decrypted->Initialize(Alignment(), head->size()); + MaybeFlush(*b->decrypted); + + int dec_len = 0, dec_extra_len = 0; + EVP_CipherInit_ex(cipher_ctx, EVP_aes_256_gcm(), NULL, key, + (unsigned char *)ivec.data(), 0); + if (madvise) MadviseDontNeed(*b->decrypted); + if (EVP_CIPHER_CTX_ctrl(cipher_ctx, EVP_CTRL_GCM_SET_TAG, sizeof(gmac), + gmac) != 1) { + LOG(ERROR) << Jfail("EVP_CTRL_GCM_SET_TAG", WriterReaderInfo()); + errorCount++; + EVP_CIPHER_CTX_cleanup(cipher_ctx); + continue; + } + if (EVP_CipherUpdate( + cipher_ctx, (unsigned char *)b->decrypted->data(), &dec_len, + (unsigned char *)head->data(), head->size()) != 1) { + LOG(ERROR) << Jfail("decryption", WriterReaderInfo()); + errorCount++; + EVP_CIPHER_CTX_cleanup(cipher_ctx); + continue; + } + if (EVP_CipherFinal_ex( + cipher_ctx, (unsigned char *)(b->decrypted->data() + dec_len), + &dec_extra_len) != 1) { + LOG(ERROR) << Jfail("decrypt_EVP_CipherFinal_ex", WriterReaderInfo()); + errorCount++; + EVP_CIPHER_CTX_cleanup(cipher_ctx); + continue; + } + dec_len += dec_extra_len; + EVP_CIPHER_CTX_cleanup(cipher_ctx); + if (dec_len != (int)b->decrypted->size()) { + std::stringstream ss; + ss << "dec_length: " << dec_len << " vs: " << b->decrypted->size(); + LOG(ERROR) << Jfail("decrypt_length_mismatch", + Json("syndrome", ss.str()) + ", " + + WriterReaderInfo()); + errorCount++; + continue; + } + MaybeFlush(*b->decrypted); + head = b->decrypted.get(); + syndrome = unencrypted->Syndrome(*head); + if (!syndrome.empty()) { + LOG(ERROR) << Jfail( + "decryption_mismatch", + JsonRecord("syndrome", syndrome) + ", " + WriterReaderInfo()); + errorCount++; + continue; + } + LOG(DEBUG) << WriterReaderInfo() << " decrypt done"; + if (exiting) break; + } + + if (do_compress) { + // Run decompressor. + if (!b->decompressed) b->Alloc(&b->decompressed); + b->decompressed->Initialize(Alignment(), bufsize); + MaybeFlush(*b->decompressed); + + if (madvise) MadviseDontNeed(*b->decompressed); + const int err = comp.dec(b->decompressed.get(), *head); + if (err) { + LOG(ERROR) << Jfail("uncompression", + Json("syndrome", err) + ", " + WriterReaderInfo()); + errorCount++; + continue; + } + if (b->decompressed->size() != bufsize) { + std::stringstream ss; + ss << "dec_length: " << b->decompressed->size() << " vs: " << bufsize; + LOG(ERROR) << Jfail( + "decompressed_size", + Json("syndrome", ss.str()) + ", " + WriterReaderInfo()); + errorCount++; + continue; + } + MaybeFlush(*b->decompressed); + head = b->decompressed.get(); + LOG(DEBUG) << WriterReaderInfo() << " uncompress done"; + } + + if (!b->re_made) b->Alloc(&b->re_made); + b->re_made->Initialize(Alignment(), bufsize); + const FloatingPointResults f_r = + GenerateData(gen, hole, buffer_seed, + copy_method, use_repstos, b->re_made.get()); + syndrome = b->original->Syndrome(*b->re_made); + + if (!syndrome.empty()) { + LOG(ERROR) << Jfail("re-make", JsonRecord("syndrome", syndrome) + ", " + + WriterReaderInfo()); + errorCount++; + continue; + } + + if (f != f_r) { + std::stringstream ss; + ss << "Was: " << f.d << " Is: " << f_r.d; + LOG(ERROR) << Jfail( + "fp-double", Json("syndrome", ss.str()) + ", " + WriterReaderInfo()); + errorCount++; + continue; + } + + if (do_hashes) { + // Re-run hash func. + std::string hash = hashers[hash_choice].func(*head); + if (hash_value != hash) { + std::stringstream ss; + ss << "hash was: " << hash_value << " is: " << hash; + LOG(ERROR) << Jfail( + "hash", Json("syndrome", ss.str()) + ", " + WriterReaderInfo()); + errorCount++; + continue; + } + LOG(DEBUG) << WriterReaderInfo() << " rehash done"; + } + + // Release MalignBuffer memory allocations. + if (do_madvise) b.reset(nullptr); + + successCount++; + } + EVP_CIPHER_CTX_free(cipher_ctx); + LOG(INFO) << "tid " << tid_ << " exiting."; +} + +static void UsageIf(bool v) { + if (!v) return; + LOG(ERROR) << "Usage cpu_check [-a] [-b] [-c] [-d] [-e] [-F] [-h]" + << " [-m] [-nN] [-p] [-qNNN] [-r] [-x] [-X] [-s] [-Y] [-H] [-kXXX]" + << " [-z] [-cn,n,...,n] [-fMinF[-MaxF]] [-tNNN] [-u]" + << "\n a: Do not misalign" + << "\n b: Do not run BoringSSL self check" + << "\n c: Explicit list of CPUs" + << "\n d: Do not rep stosb" + << "\n e: Do not encrypt" + << "\n f: Fixed specified turbo frequency (multiple of 100)" + << "\n g: Do not touch frequency, voltage and thermal controls" + << "\n F: Randomly flush caches (inverted option)" + << "\n h: Do not hash" + << "\n m: Do not madvise, do not malloc per iteration" + << "\n l: Do not provoke heavy AVX power fluctuations" + << "\n n: Generate noise" + << "\n N: Generate noise, invert -c" + << "\n p: Corrupt data provenance" + << "\n q: Quit if more than N errors" + << "\n r: Do not repmovsb" + << "\n t: Timeout in seconds" + << "\n x: Do not use AVX:256" + << "\n X: Do use AVX512" + << "\n s: Do not switch CPUs for verification" + << "\n u: Do not use fast string ops" + << "\n Y: Do frequency sweep" + << "\n H: Slam between low and high frequency" + << "\n k: Frequency step period (default 300)" + << "\n z: Do not compress/uncompress"; + exit(2); +} + +int main(int argc, char **argv) { + std::vector tid_list; + int64_t timeout = 0; + +#ifdef IN_GOOGLE3 + // Initialize the symbolizer to get a human-readable stack trace. + absl::InitializeSymbolizer(argv[0]); + absl::FailureSignalHandlerOptions options; + absl::InstallFailureSignalHandler(options); +#endif + + for (int i = 1; i < argc; i++) { + const char *flag = argv[i]; + UsageIf(flag[0] != '-'); + for (flag++; *flag != 0; flag++) { + switch (*flag) { + case 'a': + do_misalign = false; + break; + case 'b': + do_ssl_self_check = false; + break; + case 'c': + { + std::string c = ""; + for (flag++; *flag != 0; flag++) { + c += *flag; + } + std::stringstream s(c); + int t; + while (s >> t) { + tid_list.push_back(t); + if (s.peek() == ',') s.ignore(); + } + } + break; + case 'd': + do_repstosb = false; + break; + case 'e': + do_encrypt = false; + break; + case 'f': + { + std::string c(++flag); + flag += c.length(); + std::stringstream s(c); + s >> fixed_min_frequency; + fixed_max_frequency = fixed_min_frequency; + if (s.get() == '-') { + s >> fixed_max_frequency; + do_freq_sweep = true; + } + } + break; + case 'F': + do_flush = true; + break; + case 'g': + do_fvt = false; + break; + case 'H': + do_freq_hi_lo = true; + break; + case 'h': + do_hashes = false; + break; + case 'l': + do_avx_heavy = false; + break; + case 'm': + do_madvise = false; + break; + case 'n': + do_noise = true; + break; + case 'N': + do_noise = true; + do_invert_cores = true; + do_encrypt = false; + do_hashes = false; + do_compress = false; + do_madvise = false; + do_hop = false; + break; + case 'p': + do_provenance = true; + do_encrypt = false; + do_hashes = false; + do_compress = false; + break; + case 'q': + { + std::string c(++flag); + flag += c.length(); + std::stringstream s(c); + s >> error_limit; + } + break; + case 'r': + do_repmovsb = false; + break; + case 's': + do_hop = false; + break; + case 'u': + do_fast_string_ops = false; + break; + case 'x': + do_avx_256_memcpy = false; + break; + case 'X': + do_avx_512_memcpy = true; + break; + case 'Y': + do_freq_sweep = true; + break; + case 'k': + { + std::string c(++flag); + flag += c.length(); + std::stringstream s(c); + s >> seconds_per_freq; + } + break; + case 't': + { + std::string c(++flag); + flag += c.length(); + std::stringstream s(c); + s >> timeout; + } + break; + case 'z': + do_compress = false; + break; + default: + UsageIf(true); + } + if (*flag == 0) break; + } + } + + LOG(INFO) << "Starting " << argv[0] << " version " cpu_check_VERSION + << (do_misalign ? "" : " No misalign ") + << (do_repstosb ? "" : " No repstosb") + << (!do_flush ? "" : " Cache-line flush ") + << (do_encrypt ? "" : " No encryption ") + << (do_hashes ? "" : " No hash ") + << (do_madvise ? "" : " No madvise ") + << (do_provenance ? " Provenance " : "") + << (do_repmovsb ? "" : " No repmovsb ") + << (do_sse_128_memcpy ? "" : " No SSE:128 ") + << (do_avx_256_memcpy ? "" : " No AVX:256 ") + << (do_avx_512_memcpy ? "" : " No AVX:512 ") + << (do_avx_heavy ? " AVX_heavy " : "") + << (do_compress ? "" : " No compression ") + << (do_hop ? "" : " No thread_switch ") + << (do_ssl_self_check ? "" : " No BoringSSL self check") + << (!do_freq_sweep ? "" : " FrequencySweep ") + << (!do_freq_hi_lo ? "" : " FreqHiLo ") + << (do_noise ? " NOISE" : "") + << (do_fast_string_ops ? "" : " No FastStringOps"); + + std::vector threads; + std::vector workers; + std::vector words = ReadDict(); + if (words.empty()) { + LOG(ERROR) << "No word list found."; + exit(1); + } + int cpus = std::thread::hardware_concurrency(); + LOG(INFO) << "Detected hardware concurrency: " << cpus; + + if (do_invert_cores) { + // Clumsily complement the tid_list in -N mode. + std::vector v; + for (int i = 0; i < cpus; i++) { + if (std::find(tid_list.begin(), tid_list.end(), i) == tid_list.end()) { + v.push_back(i); + } + } + tid_list = v; + } + + if (tid_list.empty()) { + for (int i = 0; i < cpus; i++) { + tid_list.push_back(i); + } + } else { + for (int t : tid_list) { + LOG(INFO) << "Explicitly testing cpu: " << t; + } + } + + + // Silkscreen instance shared by all threads. + Silkscreen silkscreen(tid_list); + + for (int tid : tid_list) { + workers.push_back(new Worker(getpid(), &words, tid_list, tid, &silkscreen)); + threads.push_back(new std::thread(&Worker::Run, workers.back())); + } + signal(SIGTERM, [](int) { exiting = true; }); + signal(SIGINT, [](int) { exiting = true; }); + + struct timeval last_cpu = {0, 0}; + double last_time = t0; + while (!exiting) { + sleep(60); + struct rusage ru; + double secs = TimeInSeconds(); + if (timeout > 0 && secs >= t0 + timeout) { + exiting = true; + } + double secondsPerError = (secs - t0) / errorCount.load(); + if (getrusage(RUSAGE_SELF, &ru) == -1) { + LOG(ERROR) << "getrusage failed: " << strerror(errno); + } else { + float cpu = (((ru.ru_utime.tv_sec - last_cpu.tv_sec) * 1000000.0) + + (ru.ru_utime.tv_usec - last_cpu.tv_usec)) / + 1000000.0; + LOG(INFO) << "Errors: " << errorCount.load() + << " Successes: " << successCount.load() + << " CPU " << cpu / (secs - last_time) << " s/s" + << " Seconds Per Error: " << secondsPerError; + last_cpu = ru.ru_utime; + } + last_time = secs; + } + + // shutting down. + for (auto &t : threads) { + t->join(); + delete t; + } + for (auto w : workers) { + delete w; + } + LOG(ERROR) << errorCount.load() << " ERRORS, " << successCount.load() + << " SUCCESSES."; + LOG(INFO) << "Exiting."; + exit(errorCount != 0); +} diff --git a/crc32c.c b/crc32c.c new file mode 100644 index 0000000..bcb266d --- /dev/null +++ b/crc32c.c @@ -0,0 +1,75 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "crc32c.h" + +#ifdef __SSE4_2__ +#include + +/* x86 version based using instrinsics */ + +uint32_t crc32c_hw(const char *src, size_t len) { + const unsigned char *s = (unsigned char *)src; + uint64_t hh = ~0; +#ifdef __x86_64__ + while (len > 7) { + uint64_t v = *(uint64_t *)s; + hh = _mm_crc32_u64(hh, v); + s += 8; + len -= 8; + } +#endif /* __x86_64__ */ + uint32_t h = (uint32_t)hh; + if (len > 3) { + uint32_t v = *(uint32_t *)s; + h = _mm_crc32_u32(h, v); + s += 4; + len -= 4; + } + if (len > 1) { + uint16_t v = *(uint16_t *)s; + h = _mm_crc32_u16(h, v); + s += 2; + len -= 2; + } + if (len > 0) { + uint8_t v = *(uint8_t *)s; + h = _mm_crc32_u8(h, v); + s += 1; + len -= 1; + } + return ~h; +} +#endif /* __SSE4_2__ */ + +/* CRC-32C (iSCSI) polynomial in reversed bit order. */ +#define POLY 0x82f63b78 + +uint32_t crc32c_sw(const char *src, size_t len) { + const unsigned char *s = (unsigned char *)src; + uint32_t h = ~0; + while (len--) { + h ^= *s++; + for (int k = 0; k < 8; k++) h = h & 1 ? (h >> 1) ^ POLY : h >> 1; + } + return ~h; +} + +uint32_t crc32c(const char *src, size_t len) { +#ifdef __SSE4_2__ + return crc32c_hw(src, len); +#else + return crc32c_sw(src, len); +#endif +} diff --git a/crc32c.h b/crc32c.h new file mode 100644 index 0000000..1ac3fcb --- /dev/null +++ b/crc32c.h @@ -0,0 +1,27 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +uint32_t crc32c(const char *s, size_t len); + +#ifdef __cplusplus +} +#endif diff --git a/crc32c_test.cc b/crc32c_test.cc new file mode 100644 index 0000000..eb32469 --- /dev/null +++ b/crc32c_test.cc @@ -0,0 +1,52 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include + +#include "crc32c.h" + +extern "C" { +uint32_t crc32c_hw(const char *, size_t); +uint32_t crc32c_sw(const char *, size_t); +} + +const int MINSIZE = 1; +const int MAXSIZE = 1048576; + +int main(int argc, char **argv) { +#if defined(__x86_64__) || defined(__i386__) + std::knuth_b rndeng((std::random_device()())); + std::uniform_int_distribution size_dist(MINSIZE, MAXSIZE); + std::uniform_int_distribution d_dist(0, 255); + std::string buf; + for (int i = 0; i < 100; i++) { + size_t len = size_dist(rndeng); + buf.resize(len); + for (size_t j = 0; j < len; j++) { + buf[j] = d_dist(rndeng); + } + uint32_t crc_hw = crc32c_hw(buf.data(), len); + uint32_t crc_sw = crc32c_sw(buf.data(), len); + if (crc_hw != crc_sw) { + fprintf(stderr, "crc mismatch: hw 0x%08x vs sw 0x%08x buffer len %ld\n", + crc_hw, crc_sw, len); + } + buf.clear(); + } +#endif // defined(__x86_64__) || defined(__i386__) + return 0; +} diff --git a/fvt_controller.cc b/fvt_controller.cc new file mode 100644 index 0000000..c9329e4 --- /dev/null +++ b/fvt_controller.cc @@ -0,0 +1,127 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "config.h" +#include "fvt_controller.h" + +#include +#include +#include +#include + +#include +#undef NDEBUG +#include +#include +#include +#include + +#include "fvt_controller.h" +#include "log.h" + +namespace { + +class NonX86FVTController : public FVTController { + public: + explicit NonX86FVTController(int cpu) : FVTController(cpu) {} + ~NonX86FVTController() override {} + void SetCurrentFreqLimitMhz(int mhz) override { + LOG(FATAL) << "Unsupported platform"; + } + // Returns the maximum supported CPU frequency. + int GetAbsoluteFreqLimitMhz() override { + LOG(FATAL) << "Unsupported platform"; + return 0; + } + // Returns true if automatic Power Management enabled. + bool PowerManaged() const override { + LOG(FATAL) << "Unsupported platform"; + return false; + } + std::string FVT() override { + LOG(FATAL) << "Unsupported platform"; + return ""; + } + std::string InterestingEnables() const override { + LOG(FATAL) << "Unsupported platform"; + return ""; + } + void ControlFastStringOps(bool enable) override { + LOG(FATAL) << "Unsupported platform"; + } + + protected: + int GetCurrentFreqLimitMhz() override { + LOG(FATAL) << "Unsupported platform"; + return 0; + } + int GetCurrentFreqMhz() override { + LOG(FATAL) << "Unsupported platform"; + return 0; + } +}; + +static const char IntelVendorString[] = "GenuineIntel"; +static const char AMDVendorString[] = "AuthenticAMD"; + +} // namespace + +// Only works for Linux on x86-64 +void X86FVTController::GetCPUId(int cpu, uint32_t eax, CPUIDResult* result) { + constexpr size_t kCPUIDPathMax = 1024; + char CPUIDPath[kCPUIDPathMax]; + snprintf(CPUIDPath, sizeof(CPUIDPath), "/dev/cpu/%d/cpuid", cpu); + int fd = open(CPUIDPath, O_RDONLY); + assert(fd >= 0); + ssize_t byte_read = pread(fd, result, sizeof(*result), eax); + if (byte_read != sizeof(*result)) { + LOG(FATAL) << "CPUID " << std::hex << eax << "failed."; + } + close(fd); +} + +std::string X86FVTController::CPUIDVendorStringUncached() { + char buffer[12]; + CPUIDResult result; + GetCPUId(0, 0, &result); + memcpy(buffer + 0, &result.ebx, 4); + memcpy(buffer + 4, &result.edx, 4); + memcpy(buffer + 8, &result.ecx, 4); + return std::string(buffer, sizeof(buffer)); +} + +std::string X86FVTController::CPUIDVendorString() { + static const std::string vendor_string = CPUIDVendorStringUncached(); + return vendor_string; +} + +std::unique_ptr FVTController::Create(int cpu) { +#if defined(__i386__) || defined(__x86_64__) + const std::string vendor_string = X86FVTController::CPUIDVendorString(); +#ifdef VENDORS_INTEL_PATH + if (vendor_string == IntelVendorString) { + return NewIntelFVTController(cpu); + } +#endif +#ifdef VENDORS_AMD_PATH + if (vendor_string == AMDVendorString) { + return NewAMDFVTController(cpu); + } +#endif + LOG(FATAL) << "Unsupported x86 vendor"; + return nullptr; +#else + return std::unique_ptr(new NonX86FVTController(cpu)); +#endif +} diff --git a/fvt_controller.h b/fvt_controller.h new file mode 100644 index 0000000..8992e83 --- /dev/null +++ b/fvt_controller.h @@ -0,0 +1,164 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef FVT_CONTROLLER_ +#define FVT_CONTROLLER_ + +#include +#include + +#include +#include + +#include "log.h" +#include "utils.h" + +// Frequency, Voltage and Thermal (FVT) controller. +class FVTController { + public: + static constexpr int kMinTurboMHz = 1000; + + protected: + explicit FVTController(int cpu) : cpu_(cpu) { + ResetFrequencyMeter(); + } + + public: + virtual ~FVTController() {} + + static std::unique_ptr Create(int cpu); + + // Monitor per-cpu (or core) frequency control. + void MonitorFrequency() { + const double t = TimeInSeconds(); + const int f = GetCurrentFreqMhz(); + sum_mHz_ += (t - previous_sample_time_) * f; + previous_sample_time_ = t; + + const int mHz = GetCurrentFreqLimitMhz(); + if (mHz != max_mHz_) { + LOG_EVERY_N_SECS(INFO, 10) << "Cpu: " << cpu_ + << " max turbo frequency control changed to: " << mHz; + max_mHz_ = mHz; + } + } + + // Set the current CPU frequency limit. Warning: do this to both threads of + // HT pair. Requires 'mhz' multiple of 100, within legitimate range. + virtual void SetCurrentFreqLimitMhz(int mhz) = 0; + + // Returns the absolute maximum CPU frequency. + virtual int GetAbsoluteFreqLimitMhz() = 0; + + // Dont put much stock in this method, it's probably a lousy way to do things. + int GetMeanFreqMhz() const { + return sum_mHz_ / (previous_sample_time_ - t0_); + } + + int max_mHz() const { return max_mHz_; } + + int limit_mHz() const { return limit_mhz_; } + + // Returns true if automatic Power Management enabled. + virtual bool PowerManaged() const = 0; + + // Returns frequency, thermal, and voltage condition. + virtual std::string FVT() = 0; + + virtual std::string InterestingEnables() const = 0; + + // TODO: separate this from FVT controller. + virtual void ControlFastStringOps(bool enable) = 0; + + protected: + // Returns the current CPU frequency limit in MHz. + virtual int GetCurrentFreqLimitMhz() = 0; + + // Returns the current CPU frequency in MHz. + virtual int GetCurrentFreqMhz() = 0; + + void ResetFrequencyMeter() { + t0_ = TimeInSeconds(); + previous_sample_time_ = t0_; + sum_mHz_ = 0.0; + } + + const int cpu_; + double t0_ = 0.0; + int limit_mhz_ = 0; // const after init + int max_mHz_ = 0; + double sum_mHz_ = 0.0; + double previous_sample_time_ = 0.0; +}; + +class X86FVTController : public FVTController { + public: + struct CPUIDResult { + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + }; + + explicit X86FVTController(int cpu) : FVTController(cpu) { + std::stringstream dev; + dev << "/dev/cpu/" << cpu << "/msr"; + fd_ = open(dev.str().c_str(), O_RDWR); + if (fd_ < 0) { + LOG(ERROR) << "Cannot open: " << dev.str() + << " Running me as root?"; + } + } + ~X86FVTController() override { + if (fd_ >= 0) close(fd_); + } + + // Only works for Linux on x86-64 + static void GetCPUId(int cpu, uint32_t eax, CPUIDResult* result); + + // Return the vendor string from CPUID + static std::string CPUIDVendorString(); + + protected: + uint64_t ReadMsr(uint32_t reg) const { + if (fd_ < 0) return 0; + uint64_t v = 0; + int rc = pread(fd_, &v, sizeof(v), reg); + if (rc != sizeof(v)) { + LOG_EVERY_N_SECS(ERROR, 60) << "Unable to read cpu: " << cpu_ + << " reg: " << std::hex << reg; + } + return v; + } + + void WriteMsr(uint32_t reg, uint64_t v) const { + if (fd_ < 0) return; + int rc = pwrite(fd_, &v, sizeof(v), reg); + if (rc != sizeof(v)) { + fprintf(stderr, "rc = %d sizeof(v) = %lu\n", rc, sizeof(v)); + LOG_EVERY_N_SECS(ERROR, 60) << "Unable to write cpu: " << cpu_ + << " reg: " << std::hex << reg; + } + } + + private: + static std::string CPUIDVendorStringUncached(); + + int fd_ = -1; +}; + +extern std::unique_ptr NewAMDFVTController(int cpu); +extern std::unique_ptr NewIntelFVTController(int cpu); + +#endif // FVT_CONTROLLER_ diff --git a/log.h b/log.h new file mode 100644 index 0000000..cf207d3 --- /dev/null +++ b/log.h @@ -0,0 +1,142 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LOG_H_ +#define LOG_H_ + +#include + +#include +#include +#include + +#include +#include +#include +#include + +enum LOG_LEVEL { + DEBUG, + INFO, + WARN, + ERROR, + FATAL, +}; + +// TODO - Flag to filter above a given log level + +// TODO +// Make this more efficient. Eg. use: +// LOG_IF_COND(xx) << yy; +// becomes +// for (condvar;COND;) Log(blah).stream() << yy; + +#define LOG(X) Log(__FILE__, __LINE__, X).stream() + +#define LOG_EVERY_N(X, N) \ + static std::atomic __FILE__##__LINE__##_counter(0); \ + Log(__FILE__, __LINE__, X, __FILE__##__LINE__##_counter, N).stream() + +#define LOG_EVERY_N_SECS(X, N) \ + static std::atomic __FILE__##__LINE__##_lasttime(0); \ + Log(__FILE__, __LINE__, X, __FILE__##__LINE__##_lasttime, N).stream() + +// TODO +// #define VLOG(x) + +static LOG_LEVEL min_log_level = INFO; + +class Log { + public: + Log(const char* file, int line, LOG_LEVEL lvl) : lvl_(lvl) { + if (lvl < min_log_level) { + skip_ = true; + return; + } + Init(file, line); + } + // For LOG_EVERY_N: + Log(const char* file, int line, LOG_LEVEL lvl, std::atomic& cnt, + int N) + : lvl_(lvl) { + if (lvl < min_log_level) { + skip_ = true; + return; + } + if ((++cnt % N) != 0) { + skip_ = true; + return; + } + Init(file, line); + } + // For LOG_EVERY_N_SECS: + Log(const char* file, int line, LOG_LEVEL lvl, std::atomic& t, int N) + : lvl_(lvl) { + if (lvl < min_log_level) { + skip_ = true; + return; + } + int64_t now = time(nullptr); + int64_t last = t; + + if (now - last < N || !t.compare_exchange_strong(last, now)) { + skip_ = true; + return; + } + Init(file, line); + } + + ~Log() { + if (skip_) return; + // You might prefer to direct errors to stderr, e.g. + // (lvl_ < WARN ? std::cout : std::cerr) << os_.str(); + std::cout << os_.str() << std::endl; + if (lvl_ == FATAL) { + abort(); + } + } + + std::ostream& stream() { return os_; } + + std::ostream& operator<<(const std::string& s) { + if (skip_) return os_; + return os_ << s; + } + + private: + void Init(const char* file, int line) { + static const char l[] = { + 'D', 'I', 'W', 'E', 'F', + }; + struct timeval tvs; + struct tm tms; + time_t t; + char s[17]; + gettimeofday(&tvs, nullptr); + t = tvs.tv_sec; + gmtime_r(&t, &tms); + strftime(s, sizeof(s), "%Y%m%d-%H%M%S.", &tms); + char us[7]; + snprintf(us, sizeof(us), "%06ld", (long)tvs.tv_usec); + + os_ << l[lvl_] << s << us << ' ' << pthread_self() << " " << file << ':' + << line << "] "; + } + + LOG_LEVEL lvl_; + bool skip_ = false; + std::stringstream os_; +}; + +#endif // LOG_H_ diff --git a/third_party/farmhash b/third_party/farmhash new file mode 160000 index 0000000..0d859a8 --- /dev/null +++ b/third_party/farmhash @@ -0,0 +1 @@ +Subproject commit 0d859a811870d10f53a594927d0d0b97573ad06d diff --git a/utils.cc b/utils.cc new file mode 100644 index 0000000..f8e4193 --- /dev/null +++ b/utils.cc @@ -0,0 +1,75 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "utils.h" + +#include +#include + +#include "log.h" + +static const std::string host_name = []() { + char host[256]; + gethostname(host, sizeof(host)); + const std::string h(host); + size_t k = h.find('.'); + if (k == std::string::npos) return h; + return h.substr(0, k); +}(); + +double TimeInSeconds() { + struct timeval tv; + gettimeofday(&tv, nullptr); + return ((tv.tv_sec * 1e6) + tv.tv_usec) / 1e6; +} + +std::string Json(const std::string& field, int v) { + return "\"" + field + "\": " + std::to_string(v); +} + +std::string Json(const std::string& field, uint64_t v) { + return "\"" + field + "\": " + std::to_string(v); +} + +std::string Json(const std::string& field, double v) { + return "\"" + field + "\": " + std::to_string(v); +} + +std::string JsonBool(const std::string& field, bool v) { + return "\"" + field + "\": " + (v ? "true" : "false"); +} + +std::string Json(const std::string& field, const std::string& v) { + return "\"" + field + "\": \"" + v + "\""; +} + +std::string JsonRecord(const std::string& name, const std::string& v) { + return "\"" + name + "\": { " + v + " }"; +} + +// Emits null field. +std::string JsonNull(const std::string& field) { + return "\"" + field + "\": null"; +} + +// Returns host and timestamp fields. ToDo: probably add a run id. +std::string JTag() { + const uint64_t t = TimeInSeconds() * 1e6; + return Json("host", host_name) + ", " + Json("t_us", t); +} + +// Emits a run status record. +std::string Jstat(const std::string& v) { + return "{ " + JsonRecord("stat", v) + ", " + JTag() + " }"; +} diff --git a/utils.h b/utils.h new file mode 100644 index 0000000..53f8f2c --- /dev/null +++ b/utils.h @@ -0,0 +1,39 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef UTILS_H_ +#define UTILS_H_ + +#include + + +double TimeInSeconds(); + +std::string Json(const std::string& field, int v); +std::string Json(const std::string& field, uint64_t v); +std::string Json(const std::string& field, double v); +std::string JsonBool(const std::string& field, bool v); +std::string Json(const std::string& field, const std::string& v); +std::string JsonRecord(const std::string& name, const std::string& v); + +// Emits null field. +std::string JsonNull(const std::string& field); + +// Returns host and timestamp fields. ToDo: probably add a run id. +std::string JTag(); + +// Emits a run status record. +std::string Jstat(const std::string& v); + +#endif // UTILS_H_ diff --git a/vendors/intel/intel.cc b/vendors/intel/intel.cc new file mode 100644 index 0000000..abd7fbd --- /dev/null +++ b/vendors/intel/intel.cc @@ -0,0 +1,154 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Intel platform specific code + +#include +#include +#include + +#include "../../fvt_controller.h" +#include "../../log.h" + +namespace { + +class IntelFVTController : public X86FVTController { + public: + explicit IntelFVTController(int cpu) + : X86FVTController(cpu) { + limit_mhz_ = GetAbsoluteFreqLimitMhzImpl(); + // Set initial Turbo frequency to max. + SetCurrentMaxFreqMhzImpl(limit_mhz_); + } + + ~IntelFVTController() override { + // Upon exit, set Turbo frequency to max. + SetCurrentMaxFreqMhzImpl(limit_mhz_); + } + + // Sets current maximum frequency. Warning: do this to both threads of HT + // pair. Requires 'mhz' multiple of 100, within legitimate range. + void SetCurrentFreqLimitMhz(int mhz) override { + SetCurrentMaxFreqMhzImpl(mhz); + } + + // Returns absolute maximum CPU frequency. + int GetAbsoluteFreqLimitMhz() override { + return GetAbsoluteFreqLimitMhzImpl(); + } + + // Returns true if automatic Power Management enabled. + bool PowerManaged() const override { + return ReadMsr(k_IA32_PM_ENABLE) & 0x1; + } + + // Returns frequency, thermal, and voltage condition. + std::string FVT() override { + constexpr double kVoltageScale = 1.0 / (1 << 13); + const uint64_t v = ReadMsr(k_IA32_THERM_STATUS); + const bool valid = (v >> 31) & 0x1; + const int c = valid ? (v >> 16) & 0x7f : 0; + const bool current_limit = (v >> 12) & 0x1; + const bool power_limit = (v >> 10) & 0x1; + const bool critical = (v >> 4) & 0x1; + const bool proc_hot = v & 0x1; // AKA "Thermal Status" + const uint64_t p = ReadMsr(k_IA32_PERF_STATUS); + const double voltage = ((p >> 32) & 0xffff) * kVoltageScale; + const int f = GetCurrentFreqMhz(); + std::stringstream s; + s << (critical ? "Critical " : "") + << (proc_hot ? "ProcIsHot " : "") + << (current_limit ? "CurrentLimit " : "") + << (power_limit ? "PowerLimit " : ""); + return Json("f", f) + ", " + Json("voltage", voltage) + ", " + + Json("margin", c) + + (s.str().empty() ? "" : ", " + Json("pow_states", s.str())); + } + + std::string InterestingEnables() const override { + const uint64_t v = ReadMsr(k_IA32_MISC_ENABLE); + const bool fast_strings = v & 0x1; + const bool auto_thermal_control = (v >> 3) & 0x1; + const bool pm = PowerManaged(); + std::stringstream s; + s << (fast_strings ? "FastStrings " : "") + << (auto_thermal_control ? "AutoThermalControl " : "") + << (pm ? "PowerManagement" : ""); + return s.str(); + } + + // TODO: separate this from FVT controller. + void ControlFastStringOps(bool enable) override { + uint64_t v = ReadMsr(k_IA32_MISC_ENABLE); + v = (v & ~0x1) | (enable & 0x1); + WriteMsr(k_IA32_MISC_ENABLE, v); + } + + private: + static constexpr int k_SEND_COMMAND = 0x150; + static constexpr int k_IA32_PERF_STATUS = 0x198; + static constexpr int k_IA32_PERF_CTL = 0x199; + static constexpr int k_IA32_THERM_STATUS = 0x19c; + static constexpr int k_IA32_MISC_ENABLE = 0x1a0; + static constexpr int k_MSR_TURBO_RATIO_LIMIT = 0x1ad; + static constexpr int k_IA32_PM_ENABLE = 0x770; + + // Returns the current CPU frequency limit. This is not virtual so that we + // can call this from constructor and destructor safely. + int GetCurrentFreqLimitMhzImpl() { + uint64_t v = ReadMsr(k_IA32_PERF_CTL); + return ((v >> 8) & 0xff) * 100; + } + + int GetCurrentFreqLimitMhz() override { + return GetCurrentFreqLimitMhzImpl(); + } + + int GetCurrentFreqMhz() override { + uint64_t v = ReadMsr(k_IA32_PERF_STATUS); + return ((v >> 8) & 0xff) * 100; + } + + // This sets the current maximum CPU frequency. This is not virtual so that we + // can call this from constructor and destructor safely. + void SetCurrentMaxFreqMhzImpl(int mhz) { + if (mhz == max_mHz_) return; + if (PowerManaged()) { + LOG_EVERY_N_SECS(ERROR, 10) << "Cpu: " << cpu_ + << "Cannot set turbo freq while Power Management enabled!"; + } + ResetFrequencyMeter(); + int hundreds = mhz / 100; + assert(100 * hundreds == mhz); + assert(mhz >= kMinTurboMHz); + assert(mhz <= limit_mhz_); + uint64_t v = hundreds << 8; + WriteMsr(k_IA32_PERF_CTL, v); + max_mHz_ = mhz; + LOG_EVERY_N_SECS(INFO, 15) << "Set cpu: " << cpu_ + << " max turbo freq to: " << mhz << " MHz"; + } + + // Returns the absolute maximum frequency. + int GetAbsoluteFreqLimitMhzImpl() { + uint64_t v = ReadMsr(k_MSR_TURBO_RATIO_LIMIT); + return (v & 0xff) * 100; + } +}; + +} // namespace + +std::unique_ptr NewIntelFVTController(int cpu) { + return std::unique_ptr (new IntelFVTController(cpu)); +}