-
Notifications
You must be signed in to change notification settings - Fork 12.3k
imatrix: add option to display importance score statistics for a given imatrix file #12718
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
d8e902e
f46693b
b3ac78b
dc3373e
0589c3e
e1fd1af
490a8fe
62ac268
73d8ecb
200d88c
0b7f9c4
52e86e2
91d48da
755c1ef
72a5ec1
5cd20e4
1dbe6c3
bb47f0d
a3ac66c
3eb556e
0276d71
1f8dc23
8ecd5fa
8302a8a
bfc0dfc
5cfc443
280dfdd
235442a
c823d16
a5c4640
655be19
23ecca8
a4166a8
ed4ba31
19f8e15
f5fd2b7
bc3bd57
c3ede42
1389753
fde3089
c5a3d0a
688d0c2
b1c481a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -13,18 +13,20 @@ | |||||
#include <vector> | ||||||
#include <fstream> | ||||||
#include <unordered_map> | ||||||
#include <map> | ||||||
#include <algorithm> | ||||||
#include <regex> | ||||||
#include <numeric> | ||||||
|
||||||
#if defined(_MSC_VER) | ||||||
#pragma warning(disable: 4244 4267) // possible loss of data | ||||||
#endif | ||||||
|
||||||
static void print_usage(int, char ** argv) { | ||||||
LOG("\nexample usage:\n"); | ||||||
LOG("\n %s \\\n" | ||||||
" -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] \\\n" | ||||||
" [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n" | ||||||
" [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...] \\\n" | ||||||
LOG("\n %s -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output]\n" | ||||||
" [--chunk 123] [--output-frequency 10] [--save-frequency 0] [--show-statistics]\n" | ||||||
" [--no-ppl] [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]\n" | ||||||
" [--parse-special]\n" , argv[0]); | ||||||
LOG("\n"); | ||||||
} | ||||||
|
@@ -35,13 +37,29 @@ struct Stats { | |||||
int ncall = 0; | ||||||
}; | ||||||
|
||||||
struct tensor_statistics { | ||||||
std::string tensor; | ||||||
Stats stats; | ||||||
float total_sqract = 0.0f; | ||||||
float mean_sqract = 0.0f; | ||||||
float max_sqract = 0.0f; | ||||||
float min_sqract = 0.0f; | ||||||
int elements = 0; | ||||||
float stddev = 0.0f; | ||||||
float active = 0.0f; | ||||||
float entropy = 0.0f; | ||||||
float zd = 0.0f; | ||||||
float cossim = 0.0f; | ||||||
}; | ||||||
|
||||||
class IMatrixCollector { | ||||||
public: | ||||||
IMatrixCollector() = default; | ||||||
void set_params(common_params params) { m_params = std::move(params); } | ||||||
bool collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data); | ||||||
void save_imatrix(int ncall = -1) const; | ||||||
bool load_imatrix(const char * fname); | ||||||
const std::unordered_map<std::string, Stats> & get_mstats() const { return m_stats; } | ||||||
private: | ||||||
std::unordered_map<std::string, Stats> m_stats; | ||||||
common_params m_params; | ||||||
|
@@ -70,6 +88,121 @@ static std::string filter_tensor_name(const char * name) { | |||||
return wname; | ||||||
} | ||||||
|
||||||
static void process_tensor_name(const std::string & input, std::string & layer, std::string & tensor) { | ||||||
std::vector<std::string> name; | ||||||
std::istringstream stream(input); | ||||||
std::string item; | ||||||
|
||||||
while (std::getline(stream, item, '.')) { | ||||||
name.push_back(item); | ||||||
} | ||||||
for (size_t i = 0; i < name.size(); ++i) { | ||||||
if (name[i] == "blk" && i + 1 < name.size()) { | ||||||
layer = name[i + 1]; | ||||||
break; | ||||||
} | ||||||
} | ||||||
for (size_t i = 0; i < name.size(); ++i) { | ||||||
if (name[i] == "weight" && i > 0) { | ||||||
tensor = name[i - 1]; | ||||||
break; | ||||||
} | ||||||
} | ||||||
|
||||||
if (tensor.empty()) { | ||||||
tensor = input; | ||||||
} | ||||||
if (layer.empty()) { | ||||||
layer = "-"; | ||||||
} | ||||||
} | ||||||
|
||||||
static void compute_statistics(std::vector<tensor_statistics> & tstats, const std::string & name, const Stats & e) { | ||||||
if (e.values.size() != e.counts.size()) { | ||||||
LOG_ERR("%s: activation size mismatch for tensor %s (%zu vs %zu)\n", __func__, name.c_str(), e.counts.size(), e.values.size()); | ||||||
return; | ||||||
} | ||||||
if (e.counts.empty()) { | ||||||
LOG_ERR("%s: there are no activations for tensor %s. The imatrix may be suboptimal\n", __func__, name.c_str()); | ||||||
return; | ||||||
} | ||||||
|
||||||
const int size = e.counts.size(); | ||||||
std::vector<float> activations; | ||||||
activations.reserve(size); | ||||||
for (int i = 0; i < size; i++) { | ||||||
activations.push_back(e.values[i] / e.counts[i]); | ||||||
} | ||||||
|
||||||
const float act_total = std::accumulate(activations.begin(), activations.end(), 0.0f); | ||||||
const float act_max = *std::max_element(activations.begin(), activations.end()); | ||||||
const float act_min = *std::min_element(activations.begin(), activations.end()); | ||||||
const float act_mean = act_total / activations.size(); | ||||||
const float act_sqr_total = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f); | ||||||
const float act_var = (act_sqr_total / activations.size()) - (act_mean * act_mean); | ||||||
const float act_dev = std::sqrt(std::max(0.0f, act_var)); | ||||||
float threshold = 1e-5f; | ||||||
const int inactive_count = std::count_if(activations.begin(), activations.end(), | ||||||
[threshold](const float v) { return fabs(v) <= threshold; }); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
(Otherwise there's a |
||||||
const float active_ratio = 1 - static_cast<float>(inactive_count) / activations.size(); | ||||||
|
||||||
float entropy = 0; | ||||||
if (act_total > 0) { | ||||||
for (const auto act : activations) { | ||||||
if (const float p = act / act_total; p > 0) { | ||||||
entropy -= p * std::log2(p); | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
int z_score = 0; | ||||||
if (act_dev > 0.0f) { | ||||||
for (const auto act : activations) { | ||||||
if (const float p = (act - act_mean) / act_dev; p > 1) { | ||||||
z_score++; | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
auto & ts = tstats.emplace_back(); | ||||||
ts.tensor = name; | ||||||
ts.stats = e; | ||||||
ts.total_sqract = act_total; | ||||||
ts.mean_sqract = act_mean; | ||||||
ts.max_sqract = act_max; | ||||||
ts.min_sqract = act_min; | ||||||
ts.elements = static_cast<int>(activations.size()); | ||||||
ts.stddev = act_dev; | ||||||
ts.active = active_ratio; | ||||||
ts.entropy = entropy; | ||||||
ts.zd = static_cast<float>(z_score) / ts.elements; | ||||||
} | ||||||
|
||||||
static void compute_cossim(std::vector<tensor_statistics> & tstats) { | ||||||
static const std::regex pattern(R"(blk\.(\d+)\.)"); | ||||||
for (auto & ts : tstats) { | ||||||
if (std::smatch match; std::regex_search(ts.tensor, match, pattern)) { | ||||||
const int blk = std::stoi(match[1]); | ||||||
std::string tname(ts.tensor); | ||||||
tname.replace(match.position(1), match.length(1), std::to_string(blk-1)); | ||||||
auto prev = std::find_if(tstats.begin(), tstats.end(), | ||||||
[tname](const tensor_statistics & t) { return t.tensor == tname; }); | ||||||
if (prev != tstats.end()) { | ||||||
const float dp = std::inner_product(ts.stats.values.begin(), ts.stats.values.end(), | ||||||
prev->stats.values.begin(), 0.0f); | ||||||
const float curr_mag = std::sqrt(std::inner_product(ts.stats.values.begin(), ts.stats.values.end(), | ||||||
ts.stats.values.begin(), 0.0f)); | ||||||
const float prev_mag = std::sqrt(std::inner_product(prev->stats.values.begin(), prev->stats.values.end(), | ||||||
prev->stats.values.begin(), 0.0f)); | ||||||
const float cs = dp / (curr_mag * prev_mag); | ||||||
ts.cossim = cs; | ||||||
} | ||||||
} else { | ||||||
ts.cossim = 0; | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) { | ||||||
GGML_UNUSED(user_data); | ||||||
|
||||||
|
@@ -304,6 +437,7 @@ bool IMatrixCollector::load_imatrix(const char * fname) { | |||||
LOG_ERR("%s: no data in file %s\n", __func__, fname); | ||||||
return false; | ||||||
} | ||||||
|
||||||
for (int i = 0; i < n_entries; ++i) { | ||||||
int len; in.read((char *)&len, sizeof(len)); | ||||||
std::vector<char> name_as_vec(len+1); | ||||||
|
@@ -338,14 +472,14 @@ bool IMatrixCollector::load_imatrix(const char * fname) { | |||||
return false; | ||||||
} | ||||||
|
||||||
// Recreate the state as expected by save_imatrix(), and corerct for weighted sum. | ||||||
// Recreate the state as expected by save_imatrix(), and correct for weighted sum. | ||||||
for (int i = 0; i < nval; i++) { | ||||||
e.values[i] += tmp[i]; | ||||||
e.counts[i] += ncall; | ||||||
} | ||||||
e.ncall += ncall; | ||||||
|
||||||
} | ||||||
|
||||||
return true; | ||||||
} | ||||||
|
||||||
|
@@ -355,7 +489,6 @@ static bool ik_collect_imatrix(struct ggml_tensor * t, bool ask, void * user_dat | |||||
return g_collector.collect_imatrix(t, ask, user_data); | ||||||
} | ||||||
|
||||||
|
||||||
struct results_log_softmax { | ||||||
double log_softmax; | ||||||
float logit; | ||||||
|
@@ -580,22 +713,132 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) { | |||||
return true; | ||||||
} | ||||||
|
||||||
static bool show_statistics(const common_params & params) { | ||||||
std::vector<tensor_statistics> ts; | ||||||
if (params.in_files.empty() || params.in_files.size() > 1) { | ||||||
LOG_ERR("\nError: a single imatrix file is required to compute tensor statistics\n\n"); | ||||||
return false; | ||||||
} | ||||||
if (g_collector.load_imatrix(params.in_files[0].c_str())) { | ||||||
for (const auto & [name, stats] :g_collector.get_mstats()) { | ||||||
compute_statistics(ts, name, stats); | ||||||
} | ||||||
} else { | ||||||
LOG_ERR("\nError: %s is not a valid imatrix file\n\n", params.in_files[0].c_str()); | ||||||
return false; | ||||||
} | ||||||
if (!ts.empty()) { | ||||||
compute_cossim(ts); | ||||||
} else { | ||||||
LOG_ERR("Error: cannot compute statistics for %s\n\n", params.in_files[0].c_str()); | ||||||
return false; | ||||||
} | ||||||
|
||||||
struct tensor_comparer { | ||||||
bool operator()(const tensor_statistics & a, const tensor_statistics & b) const { | ||||||
std::string layer, name_a, name_b; | ||||||
; | ||||||
process_tensor_name(a.tensor, layer, name_a); | ||||||
process_tensor_name(b.tensor, layer, name_b); | ||||||
return name_a < name_b || (name_a == name_b && a.total_sqract > b.total_sqract); | ||||||
} | ||||||
}; | ||||||
std::sort(ts.begin(), ts.end(), tensor_comparer()); | ||||||
|
||||||
struct weighted_stats { | ||||||
float weighted_bias = 0.0f; | ||||||
float weighted_zd = 0.0f; | ||||||
float weighted_cossim = 0.0f; | ||||||
int total_elements = 0; | ||||||
}; | ||||||
std::map<int, weighted_stats> ws; | ||||||
|
||||||
LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast<int>(ts.size())); | ||||||
LOG_INF("\n%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", " Layer", " Tensor", " Σ(Act²)", | ||||||
" Min", " Max", " μ", " σ", " % Active", "N", " Entropy", "E (norm)", "ZD", | ||||||
" CosSim"); | ||||||
LOG_INF( | ||||||
"==============================================================================================================" | ||||||
"===========================================================\n"); | ||||||
for (const auto & tstat : ts) { | ||||||
std::string layer, name; | ||||||
process_tensor_name(tstat.tensor, layer, name); | ||||||
|
||||||
int blk; | ||||||
try { | ||||||
blk = std::stoi(layer); | ||||||
} catch (const std::exception & e) { | ||||||
blk = -1; // not a block layer | ||||||
} | ||||||
|
||||||
LOG_INF("%5s\t%-20s\t%10.2f\t%8.4f\t%11.4f\t%6.2f\t%6.2f\t%8.2f%%\t%6d\t%10.4f\t%6.2f%%\t%10.2f%%\t%8.4f\n", | ||||||
layer.c_str(), name.c_str(), tstat.total_sqract, tstat.min_sqract, tstat.max_sqract, tstat.mean_sqract, | ||||||
tstat.stddev, tstat.active * 100.0f, tstat.elements, tstat.entropy, | ||||||
100.0f * (tstat.entropy / std::log2(tstat.elements)), 100.0f * tstat.zd, tstat.cossim); | ||||||
|
||||||
const float weighted_bias = tstat.elements * tstat.total_sqract; | ||||||
const float weighted_zd = tstat.elements * tstat.zd; | ||||||
const float weighted_cossim = tstat.elements * tstat.cossim; | ||||||
|
||||||
if (ws.find(blk) != ws.end()) { | ||||||
ws[blk].weighted_bias += weighted_bias; | ||||||
ws[blk].weighted_zd += weighted_zd; | ||||||
ws[blk].weighted_cossim += weighted_cossim; | ||||||
ws[blk].total_elements += tstat.elements; | ||||||
} else { | ||||||
weighted_stats temp_ws; | ||||||
temp_ws.weighted_bias = weighted_bias; | ||||||
temp_ws.weighted_zd = weighted_zd; | ||||||
temp_ws.weighted_cossim = weighted_cossim; | ||||||
temp_ws.total_elements = tstat.elements; | ||||||
ws[blk] = temp_ws; | ||||||
} | ||||||
} | ||||||
|
||||||
const int layers = std::count_if(ws.begin(), ws.end(), [](const auto & kv) { return kv.first >= 0; }); | ||||||
LOG_INF("\nComputing weighted average statistics per layer (%d layers)\n", layers); | ||||||
LOG_INF("\n%s\t%s\t%s\t%s\n", " Layer", " μΣ(Act²)", " μZD", "μCosSim"); | ||||||
LOG_INF("================================================\n"); | ||||||
for (const auto & [first, second] : ws) { | ||||||
const auto & layer = first; | ||||||
const auto & stats = second; | ||||||
|
||||||
if (stats.total_elements == 0) { | ||||||
continue; | ||||||
} | ||||||
|
||||||
if (layer >= 0) { | ||||||
const float bias = stats.weighted_bias / stats.total_elements; | ||||||
const float zd = stats.weighted_zd / stats.total_elements; | ||||||
const float cossim = stats.weighted_cossim / stats.total_elements; | ||||||
|
||||||
LOG_INF("%5d\t%14.2f\t%10.4f%%\t%6.4f\n", layer, bias, 100.0f * zd, cossim); | ||||||
} | ||||||
} | ||||||
LOG_INF("\n"); | ||||||
|
||||||
return true; | ||||||
} | ||||||
|
||||||
int main(int argc, char ** argv) { | ||||||
common_params params; | ||||||
|
||||||
params.out_file = "imatrix.dat" ; | ||||||
|
||||||
params.n_ctx = 512; | ||||||
params.escape = false; | ||||||
|
||||||
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_IMATRIX, print_usage)) { | ||||||
return 1; | ||||||
} | ||||||
|
||||||
common_init(); | ||||||
if (params.show_statistics) { | ||||||
if (!show_statistics(params)) { | ||||||
return 1; | ||||||
} | ||||||
return 0; | ||||||
} | ||||||
|
||||||
common_init(); | ||||||
params.n_batch = std::min(params.n_batch, params.n_ctx); | ||||||
|
||||||
g_collector.set_params(params); | ||||||
|
||||||
for (const auto & in_file : params.in_files) { | ||||||
|
@@ -622,7 +865,6 @@ int main(int argc, char ** argv) { | |||||
|
||||||
// init | ||||||
common_init_result llama_init = common_init_from_params(params); | ||||||
|
||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is subjective, but that empty line helped better visually separate the alignment of (I suggest to bring back this empty line) |
||||||
llama_model * model = llama_init.model.get(); | ||||||
llama_context * ctx = llama_init.context.get(); | ||||||
|
||||||
|
@@ -655,12 +897,10 @@ int main(int argc, char ** argv) { | |||||
} | ||||||
} | ||||||
|
||||||
|
||||||
g_collector.save_imatrix(); | ||||||
|
||||||
LOG("\n"); | ||||||
llama_perf_context_print(ctx); | ||||||
|
||||||
llama_backend_free(); | ||||||
|
||||||
return 0; | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note that #9400 makes
e.values
ande.counts
no longer the same size, but assumese.values.size() % e.counts.size() == 0
, and iteration would look like