Files
cpu-check/cpu_check.cc
2020-05-08 13:33:05 -07:00

2076 lines
64 KiB
C++

// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#undef NDEBUG
#include "config.h"
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/types.h>
#include <errno.h>
#include <fcntl.h>
#if defined(__x86_64__) || defined(__i386__)
#include <immintrin.h>
#endif
#include <sched.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#ifdef IN_GOOGLE3
#include "third_party/zlib/zlib.h"
#else
#include <zlib.h>
#endif
#include <algorithm>
#include <atomic>
#include <cassert>
#include <climits>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <ctime>
#include <fstream>
#include <functional>
#include <iomanip>
#include <iostream>
#include <limits>
#include <list>
#include <memory>
#include <mutex>
#include <random>
#include <sstream>
#include <string>
#include <thread>
#include <utility>
#include <vector>
#ifdef IN_GOOGLE3
#include "third_party/openssl/crypto.h"
#include "third_party/openssl/evp.h"
#else
#include <openssl/crypto.h>
#include <openssl/evp.h>
#endif
#ifdef IN_GOOGLE3
#include "third_party/absl/debugging/failure_signal_handler.h"
#include "third_party/absl/debugging/symbolize.h"
#endif
#include "crc32c.h"
#include "third_party/farmhash/src/farmhash.h"
#include "fvt_controller.h"
#include "log.h"
#include "utils.h"
#if defined(__i386__) || defined(__x86_64__)
#define X86_TARGET_ATTRIBUTE(s) __attribute__ ((target (s)))
#else
#define X86_TARGET_ATTRIBUTE(s)
#endif
#undef HAS_FEATURE_MEMORY_SANITIZER
#if defined(__has_feature)
# if __has_feature(memory_sanitizer)
#define HAS_FEATURE_MEMORY_SANITIZER
# endif
#endif
// Helper to make MSAN happy. NOP if memory sanitizer is not enabled.
void InitializeMemoryForSanitizer(char* addr, size_t size) {
#ifdef HAS_FEATURE_MEMORY_SANITIZER
std::default_random_engine rnd;
std::uniform_int_distribution<int> dist(std::numeric_limits<char>::min(),
std::numeric_limits<char>::max());
for (size_t i = 0; i < size; i++) {
addr[i] = dist(rnd);
}
#endif
}
inline void __movsb(char *dst, const char *src, size_t size) {
#if defined(__i386__) || defined(__x86_64__)
__asm__ __volatile__("rep movsb"
: "+D"(dst), "+S"(src), "+c"(size)
:
: "memory");
#else
LOG(FATAL) << "Cannot rep;movsb";
#endif
}
inline void __stosb(void *dst, unsigned char c, size_t size) {
#if defined(__i386__) || defined(__x86_64__)
__asm__ __volatile__("rep stosb"
: "+D"(dst), "+c"(size)
: "a"(c)
: "memory");
#else
LOG(FATAL) << "Cannot rep;stosb";
#endif
}
inline void __sse_128_memcpy(char *dst, const char *src, size_t size) {
#if (defined(__i386__) || defined(__x86_64__))
size_t blks = size / 16;
for (int i = 0; i < blks; i++) {
_mm_storeu_si128(
reinterpret_cast<__m128i *>(dst) + i,
_mm_loadu_si128(reinterpret_cast<const __m128i *>(src) + i));
}
memcpy(dst + blks * 16, src + blks * 16, size - blks * 16);
#else
LOG(FATAL) << "SSE not available";
#endif
}
X86_TARGET_ATTRIBUTE("avx")
inline void __avx_256_memcpy(char *dst, const char *src, size_t size) {
#if (defined(__i386__) || defined(__x86_64__))
size_t blks = size / 32;
for (int i = 0; i < blks; i++) {
_mm256_storeu_si256(
reinterpret_cast<__m256i *>(dst) + i,
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(src) + i));
}
memcpy(dst + blks * 32, src + blks * 32, size - blks * 32);
#else
LOG(FATAL) << "x86 only";
#endif
}
X86_TARGET_ATTRIBUTE("avx512f")
inline void __avx_512_memcpy(char *dst, const char *src, size_t size) {
#if (defined(__i386__) || defined(__x86_64__))
size_t blks = size / 64;
for (int i = 0; i < blks; i++) {
_mm512_storeu_si512(
reinterpret_cast<__m512i *>(dst) + i,
_mm512_loadu_si512(reinterpret_cast<const __m512i *>(src) + i));
}
memcpy(dst + blks * 64, src + blks * 64, size - blks * 64);
#else
LOG(FATAL) << "x86 only";
#endif
}
static const double t0 = TimeInSeconds();
static void SleepForMillis(int64_t millis) {
// LOG(INFO) << "sleeping " << millis;
int64_t micros = 1000 * millis;
while (micros > 0) {
int mm = std::min<int>(1000000, micros);
int rc = usleep(mm);
if (rc) {
LOG(ERROR) << "cant sleep";
}
micros -= mm;
}
}
static long pagesize = sysconf(_SC_PAGESIZE);
static long cache_line_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
std::string HexData(const char *s, uint32_t l) {
const char d[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
std::string o;
o.resize(l << 1);
for (uint32_t i = 0; i < l; i++) {
uint8_t b = s[i];
o[(i << 1)] = d[(b >> 4) & 0xf];
o[(i << 1) + 1] = d[b & 0xf];
}
return o;
}
std::string HexStr(const std::string &s) { return HexData(s.data(), s.size()); }
std::atomic_bool exiting(false);
std::atomic_uintmax_t errorCount(0);
std::atomic_uintmax_t successCount(0);
static constexpr uintmax_t kErrorLimit = 2000;
#if defined(__i386__) || defined(__x86_64__)
const bool is_x86 = true;
#else
const bool is_x86 = false;
#endif
// So-called Logistic Map with parameter 4.0.
// Floating point approximation aside, if 0 < v < 1 then 0 < ChaoticF1(v) < 1.
static inline double ChaoticF1(double v) {
return 4.0 * v * (1.0 - v);
}
#if defined(__i386__) || defined(__x86_64__)
static bool can_do_avx() {
__builtin_cpu_init();
return __builtin_cpu_supports("avx");
}
static bool can_do_avx512f() {
__builtin_cpu_init();
return __builtin_cpu_supports("avx512f");
}
static bool can_do_fma() {
__builtin_cpu_init();
return __builtin_cpu_supports("fma");
}
static bool can_do_fvt() {
return geteuid() == 0; // need write access to MSRs.
}
#else
static bool can_do_avx() { return false; }
static bool can_do_avx512f() { return false; }
static bool can_do_fma() { return false; }
static bool can_do_fvt() { return false; } // x86-only for now.
#endif
bool do_madvise = true;
bool do_repmovsb = is_x86;
bool do_sse_128_memcpy = is_x86;
bool do_avx_256_memcpy = can_do_avx();
bool do_avx_512_memcpy = can_do_avx512f();
bool do_avx_heavy = can_do_avx();
bool do_compress = true;
bool do_encrypt = true;
bool do_hashes = true;
bool do_misalign = true;
bool do_hop = true;
bool do_ssl_self_check = true;
bool do_flush = false; // Default: disabled for now
bool do_provenance = false;
bool do_repstosb = is_x86;
bool do_freq_sweep = false;
bool do_freq_hi_lo = false;
bool do_noise = false;
bool do_invert_cores = false;
int fixed_min_frequency = 0;
int fixed_max_frequency = 0;
bool do_fvt = can_do_fvt();
bool do_fast_string_ops = true;
int seconds_per_freq = 300;
uintmax_t error_limit = kErrorLimit;
bool SetAffinity(int id) {
int err = 0;
#ifdef __linux__
cpu_set_t cset;
CPU_ZERO(&cset);
CPU_SET(id, &cset);
err = sched_setaffinity(0, sizeof(cset), &cset);
std::atomic_thread_fence(std::memory_order_seq_cst);
if (err) {
err = errno;
}
#elif defined(__NetBSD__)
cpuset_t *cset;
cset = cpuset_create();
if (cset == nullptr) {
LOG(ERROR) << "cpuset_create failed: " << strerror(errno);
return false;
}
cpuset_set(id, cset);
err = pthread_setaffinity_np(pthread_self(), cpuset_size(cset), cset);
std::atomic_thread_fence(std::memory_order_seq_cst);
cpuset_destroy(cset);
#endif
if (err != 0) {
LOG_EVERY_N_SECS(WARN, 30)
<< "setaffinity to tid: " << id << " failed: " << strerror(err);
}
return err == 0;
}
std::vector<std::string> ReadDict() {
// Dictionary search paths
static const char *dicts[] = {
"/usr/share/dict/words",
"words",
};
std::vector<std::string> words;
std::ifstream f;
for (const auto &d : dicts) {
f.open(d, std::ifstream::in);
if (f.is_open()) break;
f.clear();
}
if (!f.is_open()) return words;
LOG(DEBUG) << "Reading words.";
std::string word;
while (!f.eof()) {
std::getline(f, word);
words.push_back(word);
}
f.close();
LOG(DEBUG) << "Read " << words.size() << " words.";
std::sort(words.begin(), words.end(),
[](const std::string &a, const std::string &b) {
return a.size() < b.size();
});
return words;
}
class MalignBuffer {
public:
struct PunchedHole {
std::string ToString() const;
size_t start = 0;
size_t length = 0;
unsigned char v = 0x53;
};
enum CopyMethod {
kMemcpy,
kRepMov,
kSseBy128,
kAvxBy256,
kAvxBy512,
};
static std::string ToString(CopyMethod m) {
switch (m) {
case kMemcpy:
return "memcpy";
case kRepMov:
return "rep;mov";
case kSseBy128:
return "sse:128";
case kAvxBy256:
return "avx:256";
case kAvxBy512:
return "avx:512";
}
}
// Provides buffer with specified alignment.
MalignBuffer(size_t capacity)
: capacity_(capacity),
base_address_(aligned_alloc(pagesize, capacity + pagesize)) {
assert(base_address_);
// There are lots of places that use unitialized MalignBuffer. So just
// fill some pseudo-random bytes if cpu_check is compiled with msan.
InitializeMemoryForSanitizer(static_cast<char*>(base_address_), capacity_);
}
~MalignBuffer() { free(base_address_); }
// REQUIRES: alignment_offset + length <= capacity_.
void Initialize(size_t alignment_offset, size_t length);
const char* data() const { return buffer_address_; }
char* data() { return buffer_address_; }
size_t size() const { return length_; }
// REQUIRES alignment_offset + length <= capacity_.
void resize(size_t length);
// Compares 'this' to 'that' returning empty string if identical.
// If not identical, returns a syndrome, currently Hamming distance,
// corrupted subrange bounds, and the diffs.
std::string Syndrome(const MalignBuffer& that) const;
// Validated data copy from source to 'this'.
// 'this' must be appropriately sized.
// Returns syndrome upon copy failure.
std::string CopyFrom(const MalignBuffer& that, CopyMethod m);
// Unvalidated copy to 'this'.
void CopyFrom(const char* src, size_t length, CopyMethod m);
void CopyFrom(size_t pos, const char* src, size_t length, CopyMethod m);
// Randomly flushes cache lines.
void RandomFlush(std::knuth_b* rng) const;
// Conventional or rep;sto memset operation, according to 'use_rep_stos'.
void Memset(size_t offset, unsigned char v, size_t length, bool use_rep_stos);
// Memsets buffer to 'hole.v', using rep;stos operation if
// 'use_rep_stos' set;
void PunchHole(const PunchedHole& hole, bool use_rep_stos);
private:
std::string CorruptionSyndrome(const MalignBuffer& that) const;
std::string CrackId(uint64_t) const;
const size_t capacity_;
void* base_address_ = nullptr;
size_t alignment_offset_ = 0;
size_t length_ = 0; // Usable length
char* buffer_address_ = nullptr;
};
void MalignBuffer::Initialize(size_t alignment_offset, size_t length) {
assert(alignment_offset + length <= capacity_);
alignment_offset_ = alignment_offset;
length_ = length;
buffer_address_ = static_cast<char*>(base_address_) + alignment_offset_;
}
void MalignBuffer::resize(size_t length) {
Initialize(alignment_offset_, length);
}
std::string MalignBuffer::CopyFrom(const MalignBuffer& that, CopyMethod m) {
CopyFrom(that.data(), that.size(), m);
return Syndrome(that);
}
void MalignBuffer::CopyFrom(const char* src, size_t length, CopyMethod m) {
assert(size() == length);
CopyFrom(0, src, length, m);
}
void MalignBuffer::CopyFrom(size_t pos, const char* src, size_t length,
CopyMethod m) {
assert(pos + length <= size());
switch (m) {
case kMemcpy:
// Assumes memcpy doesn't use rep;movsb; false in lots of environments.
memcpy(data() + pos, src, length);
break;
case kRepMov:
__movsb(data() + pos, src, length);
break;
case kSseBy128:
__sse_128_memcpy(data() + pos, src, length);
break;
case kAvxBy256:
__avx_256_memcpy(data() + pos, src, length);
break;
case kAvxBy512:
__avx_512_memcpy(data() + pos, src, length);
break;
}
}
std::string MalignBuffer::Syndrome(const MalignBuffer& that) const {
std::stringstream s;
std::string syndrome = CorruptionSyndrome(that);
if (syndrome.empty()) return "";
s << syndrome << ", \"this\": \"" << static_cast<const void *>(data())
<< "\", "
<< "\"that\": \"" << static_cast<const void *>(that.data()) << "\"";
return s.str();
}
std::string MalignBuffer::CorruptionSyndrome(const MalignBuffer& that) const {
std::stringstream s;
if (size() != that.size()) {
s << Json("unequalSizeThis", size()) << ", "
<< Json("unequalSizeThat", that.size());
return s.str();
}
bool failed_memcmp = memcmp(data(), that.data(), that.size());
int wrong_bytes = 0;
int wrong_bits = 0;
int byte_faults = 0;
int first_wrong = INT_MAX;
int last_wrong = INT_MIN;
std::vector<int> lane_errors(8, 0);
for (size_t i = 0; i < size(); i++) {
unsigned char a = *(data() + i);
unsigned char b = *(that.data() + i);
unsigned char d = a ^ b;
if (d) {
first_wrong = std::min<int>(first_wrong, i);
last_wrong = std::max<int>(last_wrong, i);
byte_faults |= d;
wrong_bytes++;
wrong_bits += __builtin_popcount(d);
for (size_t i = 0; i < 8; i++) {
if ((d >> i) & 1) {
lane_errors[i]++;
}
}
}
}
if (wrong_bits || wrong_bytes) {
const int range_width = (last_wrong - first_wrong) + 1;
s << Json("cmpResult",
(failed_memcmp ? "Failed_Memcmp" : "**Passed_Memcmp**"))
<< ", " << Json("wrongByteCount", wrong_bytes) << ", "
<< Json("wrongBitCount", wrong_bits) << ", "
<< Json("corruptionWidth", range_width) << ", "
<< Json("corruptStart", first_wrong) << ", "
<< Json("corruptByteBitMask", byte_faults) << ", "
<< "\"byBitLane\": [";
for (size_t i = 0; i < 8; i++) {
if (i) s << ", ";
s << lane_errors[i];
}
s << " ] ";
// Dump up to 64 corrupted locations.
std::stringstream dump;
dump << " \"byteErrors\": [ " << std::hex;
uint64_t buf_a = 0;
uint64_t buf_b = 0;
for (size_t k = 0; k < std::min(64, range_width); k++) {
uint8_t a = *(data() + first_wrong + k);
uint8_t b = *(that.data() + first_wrong + k);
if (k) dump << ", ";
dump << "[ " << std::setw(2) << "\"0x" << static_cast<int>(a) << "\", "
<< std::setw(2) << "\"0x" << static_cast<int>(b) << "\" ";
buf_a = (buf_a >> 8) | static_cast<uint64_t>(a) << 56;
buf_b = (buf_b >> 8) | static_cast<uint64_t>(b) << 56;
if ((k >= 7) && (7 == ((first_wrong + k) % 8))) {
dump << ", " << CrackId(buf_a) << ", " << CrackId(buf_b);
buf_a = 0;
buf_b = 0;
}
dump << " ]";
}
dump << " ] ";
return s.str() + ", " + dump.str();
} else {
if (!failed_memcmp) return "";
return Json("cmpResult", "**Failed_Memcmp**");
}
}
std::string MalignBuffer::CrackId(uint64_t v) const {
std::stringstream s;
s << std::hex << " [\"0x" << std::setw(4) << (v >> 48) << "\", \"0x"
<< std::setw(6) << ((v >> 24) & 0xffffff) << "\", \"0x" << std::setw(6)
<< (v & 0xffffff) << "\"]";
return s.str();
}
void MalignBuffer::RandomFlush(std::knuth_b* rng) const {
#if defined(__i386__) || defined(__x86_64__)
// Note: no barriers used.
const char* p = buffer_address_ + alignment_offset_;
while (p < buffer_address_ + length_) {
if (std::uniform_int_distribution<int>(0, 1)(*rng)) {
__builtin_ia32_clflush(p);
}
p += cache_line_size;
}
#endif
}
std::string MalignBuffer::PunchedHole::ToString() const {
if (length) {
return JsonRecord("hole",
Json("start", start) + ", " + Json("length", length)
+ ", " + Json("v", static_cast<int>(v)));
} else {
return JsonNull("hole");
}
}
void MalignBuffer::Memset(size_t offset, unsigned char v, size_t length,
bool use_rep_stos) {
if (use_rep_stos) {
__stosb(data() + offset, v, length);
} else {
memset(data() + offset, v, length);
}
}
void MalignBuffer::PunchHole(const PunchedHole& hole, bool use_rep_stos) {
if (hole.length) {
Memset(hole.start, hole.v, hole.length, use_rep_stos);
}
}
// x86 AVX usage has complicated core power effects. This code tries
// to provoke some power transitions that don't otherwise happen.
// While it's at it, it lightly checks results, but that's not the central
// goal. ToDo: maybe toughen the correctness checking.
//
// The power policies are governed by a number of opaque parameters; this code
// is based on a lot of guesses.
class Avx {
public:
Avx() {}
// Activate AVX depending on throw of the dice.
// Returns syndrome if computational error detected.
std::string MaybeGoHot();
// Does a bit of computing if in a "hot" mode.
// Returns syndrome if computational error detected.
std::string BurnIfAvxHeavy();
private:
constexpr static int kIterations = 5000;
std::string Avx256(int rounds);
std::string Avx256FMA(int rounds);
std::string Avx512(int rounds);
int level_ = 0;
std::knuth_b rng_;
};
std::string Avx::MaybeGoHot() {
if (std::uniform_int_distribution<int>(0, 1)(rng_)) {
// Don't provoke.
level_ = 0;
return "";
}
if (can_do_avx512f()) {
// Processor supports both AVX and AVX512.
level_ = std::uniform_int_distribution<int>(0, 1)(rng_) ? 3 : 5;
} else {
// Processor supports only AVX.
level_ = 3;
}
return BurnIfAvxHeavy();
}
std::string Avx::BurnIfAvxHeavy() {
if (level_ == 3) {
return can_do_fma() ? Avx256FMA(kIterations) : Avx256(kIterations);
}
if (level_ == 5) {
return Avx512(kIterations);
}
return "";
}
// See notes for Avx512 below
X86_TARGET_ATTRIBUTE("avx")
std::string Avx::Avx256(int rounds) {
#if (defined(__i386__) || defined(__x86_64__))
const __m256d minus_four = _mm256_set1_pd(-4.0);
__m256d x[4];
for (int k = 0; k < 4; k++) {
x[k] =
_mm256_set1_pd(std::uniform_real_distribution<double>(0.0, 1.0)(rng_));
}
double *gross_x[4] = {
reinterpret_cast<double *>(&x[0]),
reinterpret_cast<double *>(&x[1]),
reinterpret_cast<double *>(&x[2]),
reinterpret_cast<double *>(&x[3]),
};
for (int i = 0; i < rounds; i++) {
__m256d a[4];
a[0] = _mm256_sub_pd(_mm256_mul_pd(x[0], x[0]), x[0]);
a[1] = _mm256_sub_pd(_mm256_mul_pd(x[1], x[1]), x[1]);
a[2] = _mm256_sub_pd(_mm256_mul_pd(x[2], x[2]), x[2]);
a[3] = _mm256_sub_pd(_mm256_mul_pd(x[3], x[3]), x[3]);
x[0] = _mm256_mul_pd(minus_four, a[0]);
x[1] = _mm256_mul_pd(minus_four, a[1]);
x[2] = _mm256_mul_pd(minus_four, a[2]);
x[3] = _mm256_mul_pd(minus_four, a[3]);
}
for (int k = 1; k < 4; k++) {
for (int i = 0; i < 4; i++) {
if (gross_x[k][i] != gross_x[k][0]) {
return "avx256 pd";
}
}
}
#endif
return "";
}
// See notes for Avx512 below
X86_TARGET_ATTRIBUTE("avx,fma")
std::string Avx::Avx256FMA(int rounds) {
#if (defined(__i386__) || defined(__x86_64__))
const __m256d minus_four = _mm256_set1_pd(-4.0);
__m256d x[4];
for (int k = 0; k < 4; k++) {
x[k] =
_mm256_set1_pd(std::uniform_real_distribution<double>(0.0, 1.0)(rng_));
}
double *gross_x[4] = {
reinterpret_cast<double *>(&x[0]),
reinterpret_cast<double *>(&x[1]),
reinterpret_cast<double *>(&x[2]),
reinterpret_cast<double *>(&x[3]),
};
for (int i = 0; i < rounds; i++) {
__m256d a[4];
a[0] = _mm256_fmsub_pd(x[0], x[0], x[0]);
a[1] = _mm256_fmsub_pd(x[1], x[1], x[1]);
a[2] = _mm256_fmsub_pd(x[2], x[2], x[2]);
a[3] = _mm256_fmsub_pd(x[3], x[3], x[3]);
x[0] = _mm256_mul_pd(minus_four, a[0]);
x[1] = _mm256_mul_pd(minus_four, a[1]);
x[2] = _mm256_mul_pd(minus_four, a[2]);
x[3] = _mm256_mul_pd(minus_four, a[3]);
}
for (int k = 1; k < 4; k++) {
for (int i = 0; i < 4; i++) {
if (gross_x[k][i] != gross_x[k][0]) {
return "avx256 pd";
}
}
}
#endif
return "";
}
// Interleave AVX512 parallel calculation of iterates of f(x) = 4x(1-x).
// Hope compiler too dumb to see through this.
X86_TARGET_ATTRIBUTE("avx512f")
std::string Avx::Avx512(int rounds) {
#if (defined(__i386__) || defined(__x86_64__))
const __m512d minus_four = _mm512_set1_pd(-4.0);
__m512d x[4];
for (int k = 0; k < 4; k++) {
x[k] =
_mm512_set1_pd(std::uniform_real_distribution<double>(0.0, 1.0)(rng_));
}
double *gross_x[4] = {
reinterpret_cast<double *>(&x[0]),
reinterpret_cast<double *>(&x[1]),
reinterpret_cast<double *>(&x[2]),
reinterpret_cast<double *>(&x[3]),
};
for (int i = 0; i < rounds; i++) {
__m512d a[4];
a[0] = _mm512_fmsub_pd(x[0], x[0], x[0]);
a[1] = _mm512_fmsub_pd(x[1], x[1], x[1]);
a[2] = _mm512_fmsub_pd(x[2], x[2], x[2]);
a[3] = _mm512_fmsub_pd(x[3], x[3], x[3]);
x[0] = _mm512_mul_pd(minus_four, a[0]);
x[1] = _mm512_mul_pd(minus_four, a[1]);
x[2] = _mm512_mul_pd(minus_four, a[2]);
x[3] = _mm512_mul_pd(minus_four, a[3]);
}
for (int k = 1; k < 4; k++) {
for (int i = 0; i < 7; i++) {
if (gross_x[k][i] != gross_x[k][0]) {
return "avx512 pd";
}
}
}
#endif
return "";
}
// Produces noise of all kinds by running intermittently.
// There's a coarse cycle with four fine phases:
// Phase 0: Off
// Phase 1: High-speed on-off
// Phase 2: On
// Phase 3: High-speed on-off
class NoiseScheduler {
public:
// The following constants are just plain made up outta whole cloth.
// You could consider various power regulation time constants and thermal
// intertia and so forth. Or just make it up.
static constexpr int kCoarseMillis = 5000; // Coarse period in millis
static constexpr int kFineMillis = 50; // Fine period in millis
// Blocks until next scheduled activity
static void BlockUntilOn() {
bool was_blocked = false;
while (true) {
int64_t t = 1e3 * TimeInSeconds();
int64_t coarse_block = t / kCoarseMillis;
int64_t phase = coarse_block % 4;
if (phase == 2) {
if (was_blocked) {
// LOG(INFO) << "Coarse grained unblock";
}
was_blocked = false;
return; // On
}
if (phase == 0) {
// Wait til next phase and then re-evaluate.
SleepForMillis(((coarse_block + 1) * kCoarseMillis) - t);
was_blocked = true;
continue;
}
// Fine phase.
int64_t fine_block = t / kFineMillis;
if (fine_block % 2) {
if (was_blocked) {
// LOG(INFO) << "Fine grained unblock";
}
was_blocked = false;
return; // Fine-grained on
}
// Wait til next fine block and then re-evaluate.
SleepForMillis(((fine_block + 1) * kFineMillis) - t);
was_blocked = true;
}
}
};
// Rudimentary coherence/uncore tester.
// Randomly assigns each slot of a seemingly shared buffer to a single tid,
// creating only "false sharing".
// Thus each slot, regardless of alignment, must obey program order unless the
// machine is broken.
// To be toughened, e.g.:
// Widen the slots a bit
// Control the sharing more tightly, e.g. each cache line split between 2 tids
// Maybe checksum the indices to distinguish core-local compute errors from
// coherence errors, but that's perhaps easier said than done effectively.
// As it stands, it may be particularly hard to localize failures. Though that's
// always going to be a bit hard, which is the point. One technique might be
// to leave this alone and to run on subsets of cores and sockets.
class Silkscreen {
public:
static constexpr size_t kSize = 1000 * 1000; // Size of buffer
Silkscreen(const std::vector<int> &tid_list);
~Silkscreen() { free(buffer_address_); }
// Writes value derived from 'round' into all slots owned by 'tid'.
// Returns number of slots written.
uint64_t WriteMySlots(int tid, uint64_t round);
// Returns JSON-formatted error string if slot 'k' belongs to 'tid' and has
// value not properly corresponding with 'round'.
// Returns "=" if slot 'k' belongs to 'tid' and has expected value.
// Otherwise, if slot 'k' does not belong to 'tid', returns empty string.
std::string CheckMySlot(int tid, uint64_t round, size_t k) const;
private:
int owner(size_t k) const { return owner_[k]; }
size_t size() const { return owner_.size(); }
const char* data(size_t k) const { return buffer_address_ + k; }
char* data(size_t k) { return buffer_address_ + k; }
std::vector<uint16_t> owner_; // const after initialization
char* buffer_address_ = nullptr;
};
Silkscreen::Silkscreen(const std::vector<int> &tid_list)
: buffer_address_(static_cast<char*>(aligned_alloc(
pagesize,
pagesize * ((kSize + pagesize - 1) / pagesize) ))) {
std::knuth_b rng;
std::uniform_int_distribution<size_t> dist(0, tid_list.size() - 1);
for (size_t k = 0; k < kSize; k++) {
size_t w = dist(rng);
owner_.push_back(tid_list[w]);
}
}
uint64_t Silkscreen::WriteMySlots(int tid, uint64_t round) {
uint64_t j = 0;
for (size_t k = 0; k < kSize; k++) {
if (owner(k) == tid) {
*data(k) = static_cast<char>(round);
j++;
}
}
return j;
}
std::string Silkscreen::CheckMySlot(int tid, uint64_t round, size_t k) const {
if (owner(k) != tid) return "";
const int v = *data(k);
const int w = static_cast<char>(round);
if (v == w) return "=";
return
Json("position", k) + ", " + Json("is", v) + ", " + Json("expected", w);
}
class Worker {
public:
// Does not take ownership of 'silkscreen'.
Worker(int pid, const std::vector<std::string> *words,
std::vector<int> tid_list, int tid, Silkscreen *silkscreen)
: pid_(pid), tid_(tid), words_(words), tid_list_(tid_list),
silkscreen_(silkscreen), rndeng_(std::random_device()()) {
}
~Worker() {}
void Run();
private:
static constexpr size_t kBufMin = 12;
#ifdef HAVE_FEATURE_MEMORY_SANITIZER
// Use smaller buffers if cpu_check is built with msan. Otherwise
// we will time out in testing.
static constexpr size_t kBufMax = 1 << 16; // 64 KiB
#else
static constexpr size_t kBufMax = 1 << 20; // 1 MiB
#endif
struct FloatingPointResults {
bool operator!=(const FloatingPointResults& other) const {
return d != other.d;
}
double d = 0.0;
};
typedef struct {
const char *name;
FloatingPointResults (Worker::*func)(
uint32_t seed, MalignBuffer::CopyMethod copy_method,
bool use_repstos, MalignBuffer*) const;
} generatorItem;
struct BufferSet {
void Alloc(std::unique_ptr<MalignBuffer> *p) {
const size_t kBufCap = kBufMax + 23 * pagesize;
if (!*p) {
p->reset(new MalignBuffer(kBufCap));
}
}
std::unique_ptr<MalignBuffer> original;
std::unique_ptr<MalignBuffer> compressed;
std::unique_ptr<MalignBuffer> encrypted;
std::unique_ptr<MalignBuffer> copied;
std::unique_ptr<MalignBuffer> decrypted;
std::unique_ptr<MalignBuffer> decompressed;
std::unique_ptr<MalignBuffer> re_made;
};
FloatingPointResults FillBufferSystematic(
uint32_t unused_seed, MalignBuffer::CopyMethod copy_method,
bool use_repstos, MalignBuffer* b) const;
FloatingPointResults FillBufferRandomData(
uint32_t seed, MalignBuffer::CopyMethod copy_method,
bool use_repstos, MalignBuffer* b) const;
FloatingPointResults FillBufferRandomText(
uint32_t seed, MalignBuffer::CopyMethod copy_method,
bool use_repstos, MalignBuffer* b) const;
FloatingPointResults FillBufferGrilledCheese(
uint32_t seed, MalignBuffer::CopyMethod copy_method,
bool use_repstos, MalignBuffer* b) const;
void MadviseDontNeed(const MalignBuffer &s) const;
size_t Alignment();
void MaybeFlush(const MalignBuffer &s);
// Attempts to schedules CPU frequency using the Worker's.
// FVTController object. Returns the scheduled frequency or
// 0 if there is no FVTController available.
int ScheduledMHz() const;
MalignBuffer::CopyMethod CopyMethod();
std::string FVT() const;
MalignBuffer::PunchedHole PunchedHole(size_t bufsize);
FloatingPointResults GenerateData(
const generatorItem& generator,
const MalignBuffer::PunchedHole& hole,
uint32_t seed, MalignBuffer::CopyMethod copy_method,
bool use_repstos, MalignBuffer* b) const;
// Emits a failure record.
// TODO: bump error count here, and maybe log, instead of at every
// call site.
std::string Jfail(const std::string &err, const std::string &v) {
if (errorCount > error_limit) {
exiting = true;
LOG(INFO) << "I am quitting after " << errorCount << " errors";
}
return "{ " + JsonRecord("fail", Json("err", err) + ", " + v) + ", " +
JTag() + " }";
}
// Array of random data generators.
static const std::vector<generatorItem> kGenerators;
const uint64_t pid_;
const int tid_;
const std::vector<std::string> *words_;
const std::vector<int> tid_list_;
Silkscreen* const silkscreen_;
// We don't really need "good" random numbers.
// std::mt19937_64 rndeng_;
std::knuth_b rndeng_;
uint64_t round_ = 0;
std::unique_ptr<FVTController> fvt_controller_;
};
const std::vector<Worker::generatorItem> Worker::kGenerators = {
{"SYSTEMATIC", &Worker::FillBufferSystematic},
{"DATA", &Worker::FillBufferRandomData},
{"TEXT", &Worker::FillBufferRandomText},
{"CHEESE", &Worker::FillBufferGrilledCheese},
};
std::string Worker::FVT() const {
if (fvt_controller_ == nullptr) return "";
return fvt_controller_->FVT();
}
int Worker::ScheduledMHz() const {
if (fvt_controller_ == nullptr) {
return 0;
}
if (fixed_min_frequency && (fixed_min_frequency == fixed_max_frequency)) {
// User-specified fixed frequency.
return fixed_min_frequency;
}
if (!do_freq_sweep && !do_freq_hi_lo
&& !fixed_min_frequency && !fixed_max_frequency) {
// Run at maximum frequency.
return fvt_controller_->limit_mHz();
}
const int low_f =
fixed_min_frequency ? fixed_min_frequency : fvt_controller_->kMinTurboMHz;
// hi_f cannot exceed limit
const int limit_mHz = fvt_controller_->limit_mHz();
const int hi_f = fixed_max_frequency
? std::min<int>(fixed_max_frequency, limit_mHz)
: limit_mHz;
int64_t t = TimeInSeconds() / seconds_per_freq;
if (do_freq_hi_lo) {
const int step = t % 2;
return step ? low_f : hi_f;
} else {
const int steps = 1 + (hi_f - low_f) / 100;
const int full_ramps = t / steps;
const bool upwards = full_ramps % 2;
const int step = t % steps;
return upwards ? low_f + 100 * step : hi_f - 100 * step;
}
}
// Fills 'b' with a systematic pattern.
// Returns iterate of chaotic floating point function of 'seed', with some
// reciprocal torture.
Worker::FloatingPointResults Worker::FillBufferSystematic(
uint32_t seed, MalignBuffer::CopyMethod copy_method,
bool use_repstos, MalignBuffer* b) const {
// Format: 2 bytes of PID, 3 bytes of round number, 3 bytes of offset.
// Note: Perhaps should be AC-modulated. Perhaps should be absolute aligned
// for easier recognition.
// Note: appropriate for LE machines only.
FloatingPointResults fp;
fp.d = std::max<uint32_t>(seed, 2);
for (size_t i = 0; i * 8 < b->size(); i++) {
const size_t p = 8 * i;
const size_t k = std::min<size_t>(8, b->size() - p);
const uint64_t v =
((pid_ & 0xffff) << 48)
| ((round_ & 0xffffff) << 24) | (i & 0xffffff);
for (size_t m = 0; m < k; m++) {
(b->data())[p + m] = *(reinterpret_cast<const char*>(&v) + m);
}
fp.d = 1.0 / ChaoticF1(1.0 / fp.d);
}
fp.d = 1.0 / fp.d;
return fp;
}
Worker::FloatingPointResults Worker::FillBufferRandomData(
uint32_t seed, MalignBuffer::CopyMethod copy_method,
bool use_repstos, MalignBuffer* b) const {
std::knuth_b rng(seed);
std::uniform_int_distribution<uint64_t>
dist(0, std::numeric_limits<uint64_t>::max());
size_t p = 0;
const size_t length = b->size();
// Repeatedly append random number (one to eight) random bytes.
while (p < length) {
const size_t max_span = std::min<size_t>(length - p, 8);
const size_t z = std::uniform_int_distribution<size_t>(1, max_span)(rng);
const uint64_t v = dist(rng);
b->CopyFrom(p, reinterpret_cast<const char*>(&v), z, copy_method);
p += z;
}
return FloatingPointResults();
}
Worker::FloatingPointResults Worker::FillBufferRandomText(
uint32_t seed, MalignBuffer::CopyMethod copy_method,
bool use_repstos, MalignBuffer* b) const {
std::knuth_b rng(seed);
std::exponential_distribution<double> dist(20);
const size_t bufsize = b->size();
size_t pos = 0;
while (pos < bufsize) {
const size_t r = std::min(static_cast<size_t>(dist(rng) * words_->size()),
words_->size() - 1);
const auto &word = (*words_)[r];
const size_t wordlen = word.size();
if (pos + wordlen >= bufsize) {
break;
}
b->CopyFrom(pos, word.c_str(), wordlen, copy_method);
pos += wordlen;
if (pos < bufsize) {
b->Memset(pos, ' ', 1, use_repstos);
pos++;
}
}
// Pad with spaces
b->Memset(pos, ' ', bufsize - pos, use_repstos);
return FloatingPointResults();
}
// memset (conventional or rep;stos) randomly aligned, random width, randomly
// overlapped stretches of buffer. Constants aim to hit multiple times in cache
// lines and buffers. Untuned and based on nothing but hunches.
Worker::FloatingPointResults Worker::FillBufferGrilledCheese(
uint32_t seed, MalignBuffer::CopyMethod copy_method, bool use_repstos,
MalignBuffer* b) const {
std::knuth_b rng(seed);
const size_t kAdvance = 15;
const size_t kWindow = 64;
unsigned char flavor = 0;
b->Memset(0, 0, b->size(), use_repstos);
for (int base = kWindow; base < b->size(); base += kAdvance) {
if (std::uniform_int_distribution<int>(0, 1)(rng)) continue;
flavor++;
const size_t start =
std::uniform_int_distribution<size_t>(base - kWindow, base)(rng);
const size_t end = std::uniform_int_distribution<int>(start, base)(rng);
b->Memset(start, flavor, 1 + end - start, use_repstos);
}
return FloatingPointResults();
}
Worker::FloatingPointResults Worker::GenerateData(
const generatorItem& generator,
const MalignBuffer::PunchedHole& hole,
uint32_t seed, MalignBuffer::CopyMethod copy_method,
bool use_repstos,
MalignBuffer* b) const {
const FloatingPointResults f =
(this->*generator.func)(seed, copy_method, use_repstos, b);
b->PunchHole(hole, use_repstos);
return f;
}
// Hints to the OS to release the buffer's memory.
void Worker::MadviseDontNeed(const MalignBuffer &s) const {
// Round up the buffer start address to a page boundary.
intptr_t start = ((intptr_t) s.data() + pagesize - 1) & ~(pagesize - 1);
// Round down the buffer end address to a page boundary.
intptr_t end = ((intptr_t) (s.data() + s.size() - 1)) & ~(pagesize - 1);
if (end - start >= pagesize) {
if (madvise((char *)start, end - start, MADV_DONTNEED) == -1) {
LOG(WARN) << "tid " << tid_
<< " madvise(MADV_DONTNEED) failed: " << strerror(errno);
}
}
}
void Worker::MaybeFlush(const MalignBuffer& s) {
// Half the time, tell the OS to release the destination buffer.
if (do_flush && std::uniform_int_distribution<int>(0, 1)(rndeng_)) {
s.RandomFlush(&rndeng_);
}
}
size_t Worker::Alignment() {
return do_misalign ?
std::uniform_int_distribution<size_t>(0, pagesize)(rndeng_) : 0;
}
MalignBuffer::CopyMethod Worker::CopyMethod() {
std::vector<MalignBuffer::CopyMethod> v;
v.push_back(MalignBuffer::kMemcpy);
if (do_repmovsb) {
// Weight rep;mov more heavily.
for (int i = 0; i < 3; i++) {
v.push_back(MalignBuffer::kRepMov);
}
}
if (do_sse_128_memcpy) v.push_back(MalignBuffer::kSseBy128);
if (do_avx_256_memcpy) v.push_back(MalignBuffer::kAvxBy256);
if (do_avx_512_memcpy) v.push_back(MalignBuffer::kAvxBy512);
size_t k = std::uniform_int_distribution<int>(0, v.size() - 1)(rndeng_);
return v[k];
}
MalignBuffer::PunchedHole Worker::PunchedHole(size_t bufsize) {
MalignBuffer::PunchedHole hole;
hole.length =
std::uniform_int_distribution<size_t>(
1, std::min<size_t>(bufsize, 8192))(rndeng_);
hole.start =
std::uniform_int_distribution<size_t>(
0, bufsize - hole.length)(rndeng_);
return hole;
}
std::string OpenSSL_Hash(const MalignBuffer &s, const EVP_MD *type) {
EVP_MD_CTX *ctx;
ctx = EVP_MD_CTX_create();
EVP_DigestInit_ex(ctx, type, nullptr);
std::string hash;
hash.resize(EVP_MD_CTX_size(ctx));
InitializeMemoryForSanitizer(hash.data(), EVP_MD_CTX_size(ctx));
EVP_DigestUpdate(ctx, s.data(), s.size());
EVP_DigestFinal_ex(ctx, (uint8_t *)&hash[0], nullptr);
EVP_MD_CTX_destroy(ctx);
return HexStr(hash);
}
void Worker::Run() {
// Array of hash/checksum routines.
typedef struct {
const char *name;
std::string (*func)(const MalignBuffer &);
} hashItem;
std::vector<hashItem> hashers = {
{
"MD5",
[](const MalignBuffer &s) -> std::string {
return OpenSSL_Hash(s, EVP_md5());
},
},
{
"SHA1",
[](const MalignBuffer &s) -> std::string {
return OpenSSL_Hash(s, EVP_sha1());
},
},
{
"SHA256",
[](const MalignBuffer &s) -> std::string {
return OpenSSL_Hash(s, EVP_sha256());
},
},
{
"SHA512",
[](const MalignBuffer &s) -> std::string {
return OpenSSL_Hash(s, EVP_sha512());
},
},
{
"ADLER32", // exported by zlib
[](const MalignBuffer &s) -> std::string {
uLong c = adler32(0, Z_NULL, 0);
c = adler32(c, (const Bytef *)s.data(), s.size());
return HexData((const char *)&c, sizeof(c));
},
},
{
"CRC32", // exported by zlib.
[](const MalignBuffer &s) -> std::string {
uLong c = crc32(0, Z_NULL, 0);
c = crc32(c, (const Bytef *)s.data(), s.size());
return HexData((const char *)&c, sizeof(c));
},
},
{
"CRC32C", // crc32 instruction on SSSE3
[](const MalignBuffer &s) -> std::string {
uint32_t c = crc32c(s.data(), s.size());
return HexData((const char *)&c, sizeof(c));
},
},
{
"FarmHash64", // Google farmhash
[](const MalignBuffer &s) -> std::string {
uint64_t c = util::Hash64(s.data(), s.size());
return HexData((const char *)&c, sizeof(c));
},
},
};
// Array of compression routines.
typedef struct {
const char *name;
int (*enc)(MalignBuffer *, const MalignBuffer &);
int (*dec)(MalignBuffer *, const MalignBuffer &);
} compressorItem;
std::vector<compressorItem> compressors = {
{
"ZLIB",
[](MalignBuffer *o, const MalignBuffer &s) {
uLongf olen = compressBound(s.size());
o->resize(olen);
int err = compress2((Bytef *)o->data(), &olen, (Bytef *)s.data(),
s.size(), Z_BEST_SPEED);
if (err != Z_OK) {
LOG(DEBUG) << "zlib compression failed: " << err
<< " srclen: " << s.size()
<< " destlen: " << o->size();
return err;
}
o->resize(olen);
return 0;
},
[](MalignBuffer *o, const MalignBuffer &s) {
uLongf olen = o->size();
int err = uncompress((Bytef *)o->data(), &olen, (Bytef *)s.data(),
s.size());
if (err != Z_OK) {
LOG(DEBUG) << "zlib decompression failed: " << err
<< " srclen: " << s.size()
<< " destlen: " << o->size();
return err;
}
o->resize(olen);
return 0;
},
},
};
// Choose generator and compressor uniformly.
std::uniform_int_distribution<int> gen_dist(
0, do_provenance ? 0 : kGenerators.size() - 1);
auto Gen = std::bind(gen_dist, rndeng_);
std::uniform_int_distribution<int> comp_dist(0, compressors.size() - 1);
auto Comp = std::bind(comp_dist, rndeng_);
// Run one randomly-chosen hash routine each round.
size_t hash_choice;
std::string hash_value;
EVP_CIPHER_CTX *cipher_ctx;
cipher_ctx = EVP_CIPHER_CTX_new();
// Creates FVT controller if we can do so.
if (do_fvt) {
fvt_controller_ = FVTController::Create(tid_);
fvt_controller_->SetCurrentFreqLimitMhz(fvt_controller_->limit_mHz());
fvt_controller_->ControlFastStringOps(do_fast_string_ops);
LOG(INFO) << "Tid: " << tid_
<< " Enables: " << fvt_controller_->InterestingEnables();
}
// MalignBuffers are allocated once if !do_madvise. Otherwise they are
// reallocated each iteration of the main loop, creating much more memory
// allocator work, which may itself exhibit, and suffer from, CPU defects.
std::unique_ptr<BufferSet> b;
uint64_t expected_slot_count = 0;
Avx avx;
while (!exiting) {
if (std::thread::hardware_concurrency() > 1) {
if (!SetAffinity(tid_)) {
LOG(WARN) << "Couldnt run on " << tid_ << " sleeping a bit";
sleep(30);
continue;
}
}
round_++;
if (!b) {
b.reset(new BufferSet);
}
if (do_noise) {
NoiseScheduler::BlockUntilOn();
}
const int turbo_mhz = ScheduledMHz(); // 0 if no FVT.
if (fvt_controller_ != nullptr) {
fvt_controller_->SetCurrentFreqLimitMhz(turbo_mhz);
fvt_controller_->MonitorFrequency();
}
auto Turbo = [&turbo_mhz](){
return Json("turbo", turbo_mhz);
};
auto Tid = [this](int tid) {
return "{ " + Json("tid", tid) + (do_fvt ? ", " + FVT() : "") + " }";
};
auto Writer = [this, Tid](){
return "\"writer\": " + Tid(tid_);
};
LOG_EVERY_N_SECS(INFO, 30) << Jstat(
Json("elapsed_s", static_cast<uint64_t>(TimeInSeconds() - t0)) + ", " +
Json("failures", errorCount.load()) + ", " +
Json("successes", successCount.load()) + ", " + Writer() +
(fvt_controller_ != nullptr
? ", " + Json("meanFreq", fvt_controller_->GetMeanFreqMhz()) +
", " + Json("maxFreq", fvt_controller_->max_mHz())
: ""));
if (do_avx_heavy) {
const std::string e = avx.MaybeGoHot();
if (!e.empty()) {
LOG(ERROR) << Jfail(e, Writer() + ", " + Turbo());
errorCount++;
}
}
#ifdef USE_BORINGSSL
if (do_ssl_self_check && BORINGSSL_self_test() == 0) {
LOG(ERROR) << Jfail("BORINGSSL_self_test", Writer() + ", " + Turbo());
errorCount++;
continue;
}
#endif
const bool madvise =
do_madvise && std::uniform_int_distribution<int>(0, 1)(rndeng_);
const size_t bufsize =
std::uniform_int_distribution<size_t>(kBufMin, kBufMax)(rndeng_);
auto &gen = kGenerators[Gen()];
auto &comp = compressors[Comp()];
const MalignBuffer::PunchedHole hole = PunchedHole(bufsize);
const MalignBuffer::CopyMethod copy_method = CopyMethod();
const bool use_repstos =
do_repstosb && std::uniform_int_distribution<int>(0, 1)(rndeng_);
auto BlockSummary = [&]() {
std::stringstream block_summary;
block_summary
<< Json("pattern", gen.name) << ", "
<< Json("copy", MalignBuffer::ToString(copy_method)) << ", "
<< Json("memset", use_repstos ? "rep;sto" : "memset") << ", "
<< JsonBool("madvise", madvise) << ", "
<< Json("size", bufsize) << ", "
<< Json("pid", pid_) << ", "
<< Json("round", round_) << ", "
<< hole.ToString();
return block_summary.str();
};
auto WriterInfo = [&](){
return Writer() + ", " + Turbo() + ", " + BlockSummary();
};
if (round_ > 1) {
uint64_t slots_read = 0;
uint64_t errs_this_round = 0;
const uint64_t kErrLimit = 20;
for (size_t k = 0; k < Silkscreen::kSize; k++) {
const std::string err = silkscreen_->CheckMySlot(tid_, round_ - 1, k);
if (!err.empty()) {
slots_read++;
if (err != "=") {
errs_this_round++;
if (errs_this_round <= kErrLimit) {
errorCount++;
LOG(ERROR) << Jfail("Silkscreen", JsonRecord("syndrome", err) +
", " + WriterInfo());
} else {
// When Silkscreen fails, it often fails, on several bad machines,
// in a surprising way: all slots owned by reading tid are
// are corrupt, in a way that suggests the previous round's
// writes never happened. Weird, and deserves some study, but
// meanwhile the log spew is suppressed.
}
}
}
}
if (errs_this_round > kErrLimit) {
LOG(ERROR) << Jfail(
"Silkscreen",
JsonRecord("syndrome", Json("many_errors", errs_this_round) + ", " +
Json("slots_read", slots_read)) +
", " + WriterInfo());
errorCount++;
}
if (expected_slot_count != slots_read) {
LOG(ERROR) << Jfail("Silkscreen",
Json("read", slots_read) + ", " +
Json("expected", expected_slot_count) + ", " +
WriterInfo());
errorCount++;
}
}
const uint64_t slots_written = silkscreen_->WriteMySlots(tid_, round_);
if (!expected_slot_count) {
expected_slot_count = slots_written;
}
if (expected_slot_count != slots_written) {
LOG(ERROR) << Jfail("Silkscreen",
Json("written", slots_written) + ", " +
Json("expected", expected_slot_count) + ", " +
WriterInfo());
errorCount++;
}
if (do_avx_heavy) {
// If we tried to do AVX heavy stuff. Try to run AVX heavy again to try
// to spike current.
const std::string e = avx.BurnIfAvxHeavy();
if (!e.empty()) {
LOG(ERROR) << Jfail(e, Writer() + ", " + Turbo());
errorCount++;
}
}
const auto buffer_seed = rndeng_();
if (!b->original) b->Alloc(&b->original);
b->original->Initialize(Alignment(), bufsize);
const FloatingPointResults f =
GenerateData(gen, hole, buffer_seed,
copy_method, use_repstos, b->original.get());
MaybeFlush(*b->original);
MalignBuffer* head = b->original.get();
if (do_hashes) {
hash_choice =
std::uniform_int_distribution<size_t>(0, hashers.size() - 1)(rndeng_);
hash_value = hashers[hash_choice].func(*head);
}
if (do_compress) {
// Run our randomly chosen compressor.
if (!b->compressed) b->Alloc(&b->compressed);
b->compressed->Initialize(Alignment(), bufsize);
MaybeFlush(*b->compressed);
const int err = comp.enc(b->compressed.get(), *head);
LOG(DEBUG) << WriterInfo()
<< " original->size(): " << head->size()
<< ", compressed.size(): " << b->compressed->size() << ".";
if (err) {
LOG(ERROR) << Jfail("Compression",
Json("syndrome", err) + ", " + WriterInfo());
errorCount++;
continue;
}
MaybeFlush(*b->compressed);
head = b->compressed.get();
LOG(DEBUG) << WriterInfo() << "compress done.";
if (exiting) break;
}
const unsigned char key[33] = "0123456789abcdef0123456789abcdef";
const std::string ivec(b->original->data(), kBufMin);
unsigned char gmac[16];
const MalignBuffer* const unencrypted = head;
if (do_encrypt) {
// Encrypt.
if (!b->encrypted) b->Alloc(&b->encrypted);
b->encrypted->Initialize(Alignment(), head->size());
MaybeFlush(*b->encrypted);
int enc_len = 0, enc_unused_len = 0;
EVP_CipherInit_ex(cipher_ctx, EVP_aes_256_gcm(), NULL, key,
(unsigned char *)ivec.data(), 1);
if (madvise) MadviseDontNeed(*b->encrypted);
if (EVP_CipherUpdate(
cipher_ctx, (unsigned char *)b->encrypted->data(), &enc_len,
(unsigned char *)head->data(), head->size()) != 1) {
LOG(ERROR) << Jfail("EVP_CipherUpdate", WriterInfo());
errorCount++;
EVP_CIPHER_CTX_cleanup(cipher_ctx);
continue;
}
if (EVP_CipherFinal_ex(cipher_ctx, nullptr, &enc_unused_len) != 1) {
LOG(ERROR) << Jfail("encrypt_EVP_CipherFinal_ex", WriterInfo());
errorCount++;
EVP_CIPHER_CTX_cleanup(cipher_ctx);
continue;
}
enc_len += enc_unused_len;
if (enc_len != (int)b->encrypted->size()) {
std::stringstream ss;
ss << "enc_length: " << enc_len << " vs: " << b->encrypted->size();
LOG(ERROR) << Jfail("encrypt_length_mismatch",
Json("syndrome", ss.str()) + ", " + WriterInfo());
errorCount++;
EVP_CIPHER_CTX_cleanup(cipher_ctx);
continue;
}
if (EVP_CIPHER_CTX_ctrl(cipher_ctx, EVP_CTRL_GCM_GET_TAG, sizeof(gmac),
gmac) != 1) {
LOG(ERROR) << Jfail("EVP_CTRL_GCM_GET_TAG", WriterInfo());
errorCount++;
EVP_CIPHER_CTX_cleanup(cipher_ctx);
continue;
}
EVP_CIPHER_CTX_cleanup(cipher_ctx);
MaybeFlush(*b->encrypted);
head = b->encrypted.get();
LOG(DEBUG) << "Encrypt done " << WriterInfo();
if (exiting) break;
}
// Make a copy.
if (!b->copied) b->Alloc(&b->copied);
b->copied->Initialize(Alignment(), head->size());
MaybeFlush(*b->copied);
if (madvise) MadviseDontNeed(*b->copied);
std::string syndrome = b->copied->CopyFrom(*head, copy_method);
if (!syndrome.empty()) {
LOG(ERROR) << Jfail(
"writer-detected-copy",
JsonRecord("syndrome", syndrome) + ", " + WriterInfo());
errorCount++;
continue;
}
MaybeFlush(*b->copied);
// Switch to an alternate CPU.
int newcpu = tid_;
if (do_hop && std::thread::hardware_concurrency() > 1) {
std::vector<int> cpus;
for (int i : tid_list_) {
if (i == tid_) continue;
cpus.push_back(i);
}
if (!cpus.empty()) {
int cpuoff =
std::uniform_int_distribution<int>(0, cpus.size() - 1)(rndeng_);
newcpu = cpus[cpuoff];
cpus.erase(cpus.begin() + cpuoff);
if (!SetAffinity(newcpu)) {
// Tough luck, can't run on chosen CPU.
// Validate on same cpu we were on.
newcpu = tid_;
}
}
}
auto Reader = [&Tid, &newcpu](){
return "\"reader\": " + Tid(newcpu);
};
auto WriterReaderInfo = [&](){
return
Writer() + ", " + Reader() + ", " + Turbo() + ", " + BlockSummary();
};
// Re-verify buffer copy
syndrome = b->copied->Syndrome(*head);
if (!syndrome.empty()) {
LOG(ERROR) << Jfail(
"copy", JsonRecord("syndrome", syndrome) + ", " + WriterInfo());
errorCount++;
continue;
}
MaybeFlush(*b->copied);
head = b->copied.get();
if (do_encrypt) {
// Decrypt.
if (!b->decrypted) b->Alloc(&b->decrypted);
b->decrypted->Initialize(Alignment(), head->size());
MaybeFlush(*b->decrypted);
int dec_len = 0, dec_extra_len = 0;
EVP_CipherInit_ex(cipher_ctx, EVP_aes_256_gcm(), NULL, key,
(unsigned char *)ivec.data(), 0);
if (madvise) MadviseDontNeed(*b->decrypted);
if (EVP_CIPHER_CTX_ctrl(cipher_ctx, EVP_CTRL_GCM_SET_TAG, sizeof(gmac),
gmac) != 1) {
LOG(ERROR) << Jfail("EVP_CTRL_GCM_SET_TAG", WriterReaderInfo());
errorCount++;
EVP_CIPHER_CTX_cleanup(cipher_ctx);
continue;
}
if (EVP_CipherUpdate(
cipher_ctx, (unsigned char *)b->decrypted->data(), &dec_len,
(unsigned char *)head->data(), head->size()) != 1) {
LOG(ERROR) << Jfail("decryption", WriterReaderInfo());
errorCount++;
EVP_CIPHER_CTX_cleanup(cipher_ctx);
continue;
}
if (EVP_CipherFinal_ex(
cipher_ctx, (unsigned char *)(b->decrypted->data() + dec_len),
&dec_extra_len) != 1) {
LOG(ERROR) << Jfail("decrypt_EVP_CipherFinal_ex", WriterReaderInfo());
errorCount++;
EVP_CIPHER_CTX_cleanup(cipher_ctx);
continue;
}
dec_len += dec_extra_len;
EVP_CIPHER_CTX_cleanup(cipher_ctx);
if (dec_len != (int)b->decrypted->size()) {
std::stringstream ss;
ss << "dec_length: " << dec_len << " vs: " << b->decrypted->size();
LOG(ERROR) << Jfail("decrypt_length_mismatch",
Json("syndrome", ss.str()) + ", " +
WriterReaderInfo());
errorCount++;
continue;
}
MaybeFlush(*b->decrypted);
head = b->decrypted.get();
syndrome = unencrypted->Syndrome(*head);
if (!syndrome.empty()) {
LOG(ERROR) << Jfail(
"decryption_mismatch",
JsonRecord("syndrome", syndrome) + ", " + WriterReaderInfo());
errorCount++;
continue;
}
LOG(DEBUG) << WriterReaderInfo() << " decrypt done";
if (exiting) break;
}
if (do_compress) {
// Run decompressor.
if (!b->decompressed) b->Alloc(&b->decompressed);
b->decompressed->Initialize(Alignment(), bufsize);
MaybeFlush(*b->decompressed);
if (madvise) MadviseDontNeed(*b->decompressed);
const int err = comp.dec(b->decompressed.get(), *head);
if (err) {
LOG(ERROR) << Jfail("uncompression",
Json("syndrome", err) + ", " + WriterReaderInfo());
errorCount++;
continue;
}
if (b->decompressed->size() != bufsize) {
std::stringstream ss;
ss << "dec_length: " << b->decompressed->size() << " vs: " << bufsize;
LOG(ERROR) << Jfail(
"decompressed_size",
Json("syndrome", ss.str()) + ", " + WriterReaderInfo());
errorCount++;
continue;
}
MaybeFlush(*b->decompressed);
head = b->decompressed.get();
LOG(DEBUG) << WriterReaderInfo() << " uncompress done";
}
if (!b->re_made) b->Alloc(&b->re_made);
b->re_made->Initialize(Alignment(), bufsize);
const FloatingPointResults f_r =
GenerateData(gen, hole, buffer_seed,
copy_method, use_repstos, b->re_made.get());
syndrome = b->original->Syndrome(*b->re_made);
if (!syndrome.empty()) {
LOG(ERROR) << Jfail("re-make", JsonRecord("syndrome", syndrome) + ", " +
WriterReaderInfo());
errorCount++;
continue;
}
if (f != f_r) {
std::stringstream ss;
ss << "Was: " << f.d << " Is: " << f_r.d;
LOG(ERROR) << Jfail(
"fp-double", Json("syndrome", ss.str()) + ", " + WriterReaderInfo());
errorCount++;
continue;
}
if (do_hashes) {
// Re-run hash func.
std::string hash = hashers[hash_choice].func(*head);
if (hash_value != hash) {
std::stringstream ss;
ss << "hash was: " << hash_value << " is: " << hash;
LOG(ERROR) << Jfail(
"hash", Json("syndrome", ss.str()) + ", " + WriterReaderInfo());
errorCount++;
continue;
}
LOG(DEBUG) << WriterReaderInfo() << " rehash done";
}
// Release MalignBuffer memory allocations.
if (do_madvise) b.reset(nullptr);
successCount++;
}
EVP_CIPHER_CTX_free(cipher_ctx);
LOG(INFO) << "tid " << tid_ << " exiting.";
}
static void UsageIf(bool v) {
if (!v) return;
LOG(ERROR) << "Usage cpu_check [-a] [-b] [-c] [-d] [-e] [-F] [-h]"
<< " [-m] [-nN] [-p] [-qNNN] [-r] [-x] [-X] [-s] [-Y] [-H] [-kXXX]"
<< " [-z] [-cn,n,...,n] [-fMinF[-MaxF]] [-tNNN] [-u]"
<< "\n a: Do not misalign"
<< "\n b: Do not run BoringSSL self check"
<< "\n c: Explicit list of CPUs"
<< "\n d: Do not rep stosb"
<< "\n e: Do not encrypt"
<< "\n f: Fixed specified turbo frequency (multiple of 100)"
<< "\n g: Do not touch frequency, voltage and thermal controls"
<< "\n F: Randomly flush caches (inverted option)"
<< "\n h: Do not hash"
<< "\n m: Do not madvise, do not malloc per iteration"
<< "\n l: Do not provoke heavy AVX power fluctuations"
<< "\n n: Generate noise"
<< "\n N: Generate noise, invert -c"
<< "\n p: Corrupt data provenance"
<< "\n q: Quit if more than N errors"
<< "\n r: Do not repmovsb"
<< "\n t: Timeout in seconds"
<< "\n x: Do not use AVX:256"
<< "\n X: Do use AVX512"
<< "\n s: Do not switch CPUs for verification"
<< "\n u: Do not use fast string ops"
<< "\n Y: Do frequency sweep"
<< "\n H: Slam between low and high frequency"
<< "\n k: Frequency step period (default 300)"
<< "\n z: Do not compress/uncompress";
exit(2);
}
int main(int argc, char **argv) {
std::vector<int> tid_list;
int64_t timeout = 0;
#ifdef IN_GOOGLE3
// Initialize the symbolizer to get a human-readable stack trace.
absl::InitializeSymbolizer(argv[0]);
absl::FailureSignalHandlerOptions options;
absl::InstallFailureSignalHandler(options);
#endif
for (int i = 1; i < argc; i++) {
const char *flag = argv[i];
UsageIf(flag[0] != '-');
for (flag++; *flag != 0; flag++) {
switch (*flag) {
case 'a':
do_misalign = false;
break;
case 'b':
do_ssl_self_check = false;
break;
case 'c':
{
std::string c = "";
for (flag++; *flag != 0; flag++) {
c += *flag;
}
std::stringstream s(c);
int t;
while (s >> t) {
tid_list.push_back(t);
if (s.peek() == ',') s.ignore();
}
}
break;
case 'd':
do_repstosb = false;
break;
case 'e':
do_encrypt = false;
break;
case 'f':
{
std::string c(++flag);
flag += c.length();
std::stringstream s(c);
s >> fixed_min_frequency;
fixed_max_frequency = fixed_min_frequency;
if (s.get() == '-') {
s >> fixed_max_frequency;
do_freq_sweep = true;
}
}
break;
case 'F':
do_flush = true;
break;
case 'g':
do_fvt = false;
break;
case 'H':
do_freq_hi_lo = true;
break;
case 'h':
do_hashes = false;
break;
case 'l':
do_avx_heavy = false;
break;
case 'm':
do_madvise = false;
break;
case 'n':
do_noise = true;
break;
case 'N':
do_noise = true;
do_invert_cores = true;
do_encrypt = false;
do_hashes = false;
do_compress = false;
do_madvise = false;
do_hop = false;
break;
case 'p':
do_provenance = true;
do_encrypt = false;
do_hashes = false;
do_compress = false;
break;
case 'q':
{
std::string c(++flag);
flag += c.length();
std::stringstream s(c);
s >> error_limit;
}
break;
case 'r':
do_repmovsb = false;
break;
case 's':
do_hop = false;
break;
case 'u':
do_fast_string_ops = false;
break;
case 'x':
do_avx_256_memcpy = false;
break;
case 'X':
do_avx_512_memcpy = true;
break;
case 'Y':
do_freq_sweep = true;
break;
case 'k':
{
std::string c(++flag);
flag += c.length();
std::stringstream s(c);
s >> seconds_per_freq;
}
break;
case 't':
{
std::string c(++flag);
flag += c.length();
std::stringstream s(c);
s >> timeout;
}
break;
case 'z':
do_compress = false;
break;
default:
UsageIf(true);
}
if (*flag == 0) break;
}
}
LOG(INFO) << "Starting " << argv[0] << " version " cpu_check_VERSION
<< (do_misalign ? "" : " No misalign ")
<< (do_repstosb ? "" : " No repstosb")
<< (!do_flush ? "" : " Cache-line flush ")
<< (do_encrypt ? "" : " No encryption ")
<< (do_hashes ? "" : " No hash ")
<< (do_madvise ? "" : " No madvise ")
<< (do_provenance ? " Provenance " : "")
<< (do_repmovsb ? "" : " No repmovsb ")
<< (do_sse_128_memcpy ? "" : " No SSE:128 ")
<< (do_avx_256_memcpy ? "" : " No AVX:256 ")
<< (do_avx_512_memcpy ? "" : " No AVX:512 ")
<< (do_avx_heavy ? " AVX_heavy " : "")
<< (do_compress ? "" : " No compression ")
<< (do_hop ? "" : " No thread_switch ")
<< (do_ssl_self_check ? "" : " No BoringSSL self check")
<< (!do_freq_sweep ? "" : " FrequencySweep ")
<< (!do_freq_hi_lo ? "" : " FreqHiLo ")
<< (do_noise ? " NOISE" : "")
<< (do_fast_string_ops ? "" : " No FastStringOps");
std::vector<std::thread *> threads;
std::vector<Worker *> workers;
std::vector<std::string> words = ReadDict();
if (words.empty()) {
LOG(ERROR) << "No word list found.";
exit(1);
}
int cpus = std::thread::hardware_concurrency();
LOG(INFO) << "Detected hardware concurrency: " << cpus;
if (do_invert_cores) {
// Clumsily complement the tid_list in -N mode.
std::vector<int> v;
for (int i = 0; i < cpus; i++) {
if (std::find(tid_list.begin(), tid_list.end(), i) == tid_list.end()) {
v.push_back(i);
}
}
tid_list = v;
}
if (tid_list.empty()) {
for (int i = 0; i < cpus; i++) {
tid_list.push_back(i);
}
} else {
for (int t : tid_list) {
LOG(INFO) << "Explicitly testing cpu: " << t;
}
}
// Silkscreen instance shared by all threads.
Silkscreen silkscreen(tid_list);
for (int tid : tid_list) {
workers.push_back(new Worker(getpid(), &words, tid_list, tid, &silkscreen));
threads.push_back(new std::thread(&Worker::Run, workers.back()));
}
signal(SIGTERM, [](int) { exiting = true; });
signal(SIGINT, [](int) { exiting = true; });
struct timeval last_cpu = {0, 0};
double last_time = t0;
while (!exiting) {
sleep(60);
struct rusage ru;
double secs = TimeInSeconds();
if (timeout > 0 && secs >= t0 + timeout) {
exiting = true;
}
double secondsPerError = (secs - t0) / errorCount.load();
if (getrusage(RUSAGE_SELF, &ru) == -1) {
LOG(ERROR) << "getrusage failed: " << strerror(errno);
} else {
float cpu = (((ru.ru_utime.tv_sec - last_cpu.tv_sec) * 1000000.0) +
(ru.ru_utime.tv_usec - last_cpu.tv_usec)) /
1000000.0;
LOG(INFO) << "Errors: " << errorCount.load()
<< " Successes: " << successCount.load()
<< " CPU " << cpu / (secs - last_time) << " s/s"
<< " Seconds Per Error: " << secondsPerError;
last_cpu = ru.ru_utime;
}
last_time = secs;
}
// shutting down.
for (auto &t : threads) {
t->join();
delete t;
}
for (auto w : workers) {
delete w;
}
LOG(ERROR) << errorCount.load() << " ERRORS, " << successCount.load()
<< " SUCCESSES.";
LOG(INFO) << "Exiting.";
exit(errorCount != 0);
}