Raptor 3.0.0-rc.1
A fast and space-efficient pre-filter for querying very large collections of nucleotide sequences
 
index_upgrader.hpp
Go to the documentation of this file.
1// --------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md
6// --------------------------------------------------------------------------------------------------
7
13#pragma once
14
16#include <raptor/index.hpp>
17
18namespace raptor
19{
20
21template <seqan3::data_layout data_layout_mode_ = seqan3::data_layout::uncompressed>
23{
24public:
25 std::string index_file{};
26 std::string output_file{};
27 double fpr{};
28 size_t max_count{};
29
30 index_upgrader() = default;
31 index_upgrader(index_upgrader const &) = default;
32 index_upgrader(index_upgrader &&) = default;
33 index_upgrader & operator=(index_upgrader const &) = default;
34 index_upgrader & operator=(index_upgrader &&) = default;
35 ~index_upgrader() = default;
36
37 explicit index_upgrader(upgrade_arguments const & arguments, size_t const max_count) :
38 index_file{arguments.index_file},
39 output_file{arguments.output_file},
40 fpr{arguments.fpr},
41 max_count{max_count}
42 {}
43
44 void upgrade()
45 {
47 {
48 std::ifstream is{index_file, std::ios::binary};
49 cereal::BinaryInputArchive iarchive{is};
50 index.load_old_index(iarchive);
51 }
52 if (std::isnan(fpr))
53 fpr = compute_fpr(index.ibf().hash_function_count(), max_count, index.ibf().bin_size());
54 index.fpr_ = fpr;
55 std::cout << "FPR for " << index_file << ": " << fpr << '\n';
56 index.is_hibf_ = false;
57 std::ofstream os{output_file, std::ios::binary};
58 cereal::BinaryOutputArchive oarchive{os};
59 oarchive(index);
60 }
61
62 static double compute_fpr(size_t const hash_fun, size_t const count, size_t const bin_size)
63 {
64 double const exp_arg = (hash_fun * count) / static_cast<double>(bin_size);
65 double const log_arg = 1.0 - std::exp(-exp_arg);
66 return std::exp(hash_fun * std::log(log_arg));
67 }
68};
69
70} // namespace raptor
Definition: index_upgrader.hpp:23
Definition: index.hpp:54
T exp(T... args)
Provides raptor::raptor_index.
T isnan(T... args)
T log(T... args)
Definition: upgrade_arguments.hpp:24
Provides raptor::upgrade_arguments.