Raptor 3.0.0-rc.1
A fast and space-efficient pre-filter for querying very large collections of nucleotide sequences
 
index_factory.hpp
Go to the documentation of this file.
1// --------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md
6// --------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <seqan3/search/views/minimiser_hash.hpp>
16
22#include <raptor/index.hpp>
23
24namespace raptor
25{
26
28{
29public:
30 index_factory() = default;
31 index_factory(index_factory const &) = default;
32 index_factory(index_factory &&) = default;
33 index_factory & operator=(index_factory const &) = default;
34 index_factory & operator=(index_factory &&) = default;
35 ~index_factory() = default;
36
37 explicit index_factory(build_arguments const & args) : arguments{std::addressof(args)}
38 {
39 if (arguments->input_is_minimiser)
41 else
42 reader = file_reader<file_types::sequence>{arguments->shape, arguments->window_size};
43 }
44
45 explicit index_factory(build_arguments const & args, partition_config const & cfg) :
46 arguments{std::addressof(args)},
47 config{std::addressof(cfg)}
48 {
49 if (arguments->input_is_minimiser)
50 reader = file_reader<file_types::minimiser>{}; // GCOVR_EXCL_LINE
51 else
52 reader = file_reader<file_types::sequence>{arguments->shape, arguments->window_size};
53 }
54
55 [[nodiscard]] raptor_index<> operator()(size_t const part = 0u) const
56 {
57 return construct(part);
58 }
59
60private:
61 build_arguments const * const arguments{nullptr};
62 partition_config const * const config{nullptr};
64
65 raptor_index<> construct(size_t const part) const
66 {
67 assert(arguments != nullptr);
68
69 arguments->index_allocation_timer.start();
70 raptor_index<> index{*arguments};
71 arguments->index_allocation_timer.stop();
72
73 auto worker = [&](auto && zipped_view, auto &&)
74 {
75 timer<concurrent::no> local_user_bin_io_timer{};
76 timer<concurrent::no> local_fill_ibf_timer{};
77 std::vector<uint64_t> hashes{};
78 auto & ibf = index.ibf();
79
80 for (auto && [file_names, bin_number] : zipped_view)
81 {
82 hashes.clear();
83 local_user_bin_io_timer.start();
85 [&](auto const & reader)
86 {
87 if (config == nullptr)
88 reader.hash_into(file_names, std::back_inserter(hashes));
89 else
90 reader.hash_into_if(file_names,
91 std::back_inserter(hashes),
92 [&](uint64_t const hash)
93 {
94 return config->hash_partition(hash) == part;
95 });
96 },
97 reader);
98 local_user_bin_io_timer.stop();
99
100 local_fill_ibf_timer.start();
101 for (auto && value : hashes)
102 ibf.emplace(value, seqan3::bin_index{bin_number});
103 local_fill_ibf_timer.stop();
104 }
105
106 arguments->user_bin_io_timer += local_user_bin_io_timer;
107 arguments->fill_ibf_timer += local_fill_ibf_timer;
108 };
109
110 call_parallel_on_bins(worker, arguments->bin_path, arguments->threads);
111
112 return index;
113 }
114};
115
116} // namespace raptor
T addressof(T... args)
Provides raptor::adjust_seed.
T back_inserter(T... args)
Provides raptor::call_parallel_on_bins.
Definition: file_reader.hpp:32
Definition: index_factory.hpp:28
Definition: index.hpp:54
Definition: timer.hpp:30
Provides raptor::dna4_traits.
Provides raptor::file_reader.
Provides raptor::raptor_index.
Provides raptor::partition_config.
Definition: build_arguments.hpp:28
Definition: partition_config.hpp:23
T visit(T... args)