diff options
Diffstat (limited to 'uclibc-crosstools-gcc-4.4.2-1/usr/mips-linux-uclibc/include/c++/4.4.2/parallel/multiway_mergesort.h')
-rw-r--r-- | uclibc-crosstools-gcc-4.4.2-1/usr/mips-linux-uclibc/include/c++/4.4.2/parallel/multiway_mergesort.h | 475 |
1 files changed, 475 insertions, 0 deletions
diff --git a/uclibc-crosstools-gcc-4.4.2-1/usr/mips-linux-uclibc/include/c++/4.4.2/parallel/multiway_mergesort.h b/uclibc-crosstools-gcc-4.4.2-1/usr/mips-linux-uclibc/include/c++/4.4.2/parallel/multiway_mergesort.h new file mode 100644 index 0000000..11dd885 --- /dev/null +++ b/uclibc-crosstools-gcc-4.4.2-1/usr/mips-linux-uclibc/include/c++/4.4.2/parallel/multiway_mergesort.h @@ -0,0 +1,475 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file parallel/multiway_mergesort.h + * @brief Parallel multiway merge sort. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H +#define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1 + +#include <vector> + +#include <parallel/basic_iterator.h> +#include <bits/stl_algo.h> +#include <parallel/parallel.h> +#include <parallel/multiway_merge.h> + +namespace __gnu_parallel +{ + +/** @brief Subsequence description. */ +template<typename _DifferenceTp> + struct Piece + { + typedef _DifferenceTp difference_type; + + /** @brief Begin of subsequence. */ + difference_type begin; + + /** @brief End of subsequence. */ + difference_type end; + }; + +/** @brief Data accessed by all threads. + * + * PMWMS = parallel multiway mergesort */ +template<typename RandomAccessIterator> + struct PMWMSSortingData + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + /** @brief Number of threads involved. */ + thread_index_t num_threads; + + /** @brief Input begin. */ + RandomAccessIterator source; + + /** @brief Start indices, per thread. */ + difference_type* starts; + + /** @brief Storage in which to sort. */ + value_type** temporary; + + /** @brief Samples. */ + value_type* samples; + + /** @brief Offsets to add to the found positions. */ + difference_type* offsets; + + /** @brief Pieces of data to merge @c [thread][sequence] */ + std::vector<Piece<difference_type> >* pieces; +}; + +/** + * @brief Select samples from a sequence. + * @param sd Pointer to algorithm data. Result will be placed in + * @c sd->samples. + * @param num_samples Number of samples to select. + */ +template<typename RandomAccessIterator, typename _DifferenceTp> + void + determine_samples(PMWMSSortingData<RandomAccessIterator>* sd, + _DifferenceTp num_samples) + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef _DifferenceTp difference_type; + + thread_index_t iam = omp_get_thread_num(); + + difference_type* es = new difference_type[num_samples + 2]; + + equally_split(sd->starts[iam + 1] - sd->starts[iam], + num_samples + 1, es); + + for (difference_type i = 0; i < num_samples; ++i) + ::new(&(sd->samples[iam * num_samples + i])) + value_type(sd->source[sd->starts[iam] + es[i + 1]]); + + delete[] es; + } + +/** @brief Split consistently. */ +template<bool exact, typename RandomAccessIterator, + typename Comparator, typename SortingPlacesIterator> + struct split_consistently + { + }; + +/** @brief Split by exact splitting. */ +template<typename RandomAccessIterator, typename Comparator, + typename SortingPlacesIterator> + struct split_consistently + <true, RandomAccessIterator, Comparator, SortingPlacesIterator> + { + void operator()( + const thread_index_t iam, + PMWMSSortingData<RandomAccessIterator>* sd, + Comparator& comp, + const typename + std::iterator_traits<RandomAccessIterator>::difference_type + num_samples) + const + { +# pragma omp barrier + + std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> > + seqs(sd->num_threads); + for (thread_index_t s = 0; s < sd->num_threads; s++) + seqs[s] = std::make_pair(sd->temporary[s], + sd->temporary[s] + + (sd->starts[s + 1] - sd->starts[s])); + + std::vector<SortingPlacesIterator> offsets(sd->num_threads); + + // if not last thread + if (iam < sd->num_threads - 1) + multiseq_partition(seqs.begin(), seqs.end(), + sd->starts[iam + 1], offsets.begin(), comp); + + for (int seq = 0; seq < sd->num_threads; seq++) + { + // for each sequence + if (iam < (sd->num_threads - 1)) + sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first; + else + // very end of this sequence + sd->pieces[iam][seq].end = + sd->starts[seq + 1] - sd->starts[seq]; + } + +# pragma omp barrier + + for (thread_index_t seq = 0; seq < sd->num_threads; seq++) + { + // For each sequence. + if (iam > 0) + sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end; + else + // Absolute beginning. + sd->pieces[iam][seq].begin = 0; + } + } + }; + +/** @brief Split by sampling. */ +template<typename RandomAccessIterator, typename Comparator, + typename SortingPlacesIterator> + struct split_consistently<false, RandomAccessIterator, Comparator, + SortingPlacesIterator> + { + void operator()( + const thread_index_t iam, + PMWMSSortingData<RandomAccessIterator>* sd, + Comparator& comp, + const typename + std::iterator_traits<RandomAccessIterator>::difference_type + num_samples) + const + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + determine_samples(sd, num_samples); + +# pragma omp barrier + +# pragma omp single + __gnu_sequential::sort(sd->samples, + sd->samples + (num_samples * sd->num_threads), + comp); + +# pragma omp barrier + + for (thread_index_t s = 0; s < sd->num_threads; ++s) + { + // For each sequence. + if (num_samples * iam > 0) + sd->pieces[iam][s].begin = + std::lower_bound(sd->temporary[s], + sd->temporary[s] + + (sd->starts[s + 1] - sd->starts[s]), + sd->samples[num_samples * iam], + comp) + - sd->temporary[s]; + else + // Absolute beginning. + sd->pieces[iam][s].begin = 0; + + if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads)) + sd->pieces[iam][s].end = + std::lower_bound(sd->temporary[s], + sd->temporary[s] + + (sd->starts[s + 1] - sd->starts[s]), + sd->samples[num_samples * (iam + 1)], + comp) + - sd->temporary[s]; + else + // Absolute end. + sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s]; + } + } + }; + +template<bool stable, typename RandomAccessIterator, typename Comparator> + struct possibly_stable_sort + { + }; + +template<typename RandomAccessIterator, typename Comparator> + struct possibly_stable_sort<true, RandomAccessIterator, Comparator> + { + void operator()(const RandomAccessIterator& begin, + const RandomAccessIterator& end, Comparator& comp) const + { + __gnu_sequential::stable_sort(begin, end, comp); + } + }; + +template<typename RandomAccessIterator, typename Comparator> + struct possibly_stable_sort<false, RandomAccessIterator, Comparator> + { + void operator()(const RandomAccessIterator begin, + const RandomAccessIterator end, Comparator& comp) const + { + __gnu_sequential::sort(begin, end, comp); + } + }; + +template<bool stable, typename SeqRandomAccessIterator, + typename RandomAccessIterator, typename Comparator, + typename DiffType> + struct possibly_stable_multiway_merge + { + }; + +template<typename SeqRandomAccessIterator, typename RandomAccessIterator, + typename Comparator, typename DiffType> + struct possibly_stable_multiway_merge + <true, SeqRandomAccessIterator, RandomAccessIterator, Comparator, + DiffType> + { + void operator()(const SeqRandomAccessIterator& seqs_begin, + const SeqRandomAccessIterator& seqs_end, + const RandomAccessIterator& target, + Comparator& comp, + DiffType length_am) const + { + stable_multiway_merge(seqs_begin, seqs_end, target, length_am, comp, + sequential_tag()); + } + }; + +template<typename SeqRandomAccessIterator, typename RandomAccessIterator, + typename Comparator, typename DiffType> + struct possibly_stable_multiway_merge + <false, SeqRandomAccessIterator, RandomAccessIterator, Comparator, + DiffType> + { + void operator()(const SeqRandomAccessIterator& seqs_begin, + const SeqRandomAccessIterator& seqs_end, + const RandomAccessIterator& target, + Comparator& comp, + DiffType length_am) const + { + multiway_merge(seqs_begin, seqs_end, target, length_am, comp, + sequential_tag()); + } + }; + +/** @brief PMWMS code executed by each thread. + * @param sd Pointer to algorithm data. + * @param comp Comparator. + */ +template<bool stable, bool exact, typename RandomAccessIterator, + typename Comparator> + void + parallel_sort_mwms_pu(PMWMSSortingData<RandomAccessIterator>* sd, + Comparator& comp) + { + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + thread_index_t iam = omp_get_thread_num(); + + // Length of this thread's chunk, before merging. + difference_type length_local = sd->starts[iam + 1] - sd->starts[iam]; + + // Sort in temporary storage, leave space for sentinel. + + typedef value_type* SortingPlacesIterator; + + sd->temporary[iam] = + static_cast<value_type*>( + ::operator new(sizeof(value_type) * (length_local + 1))); + + // Copy there. + std::uninitialized_copy(sd->source + sd->starts[iam], + sd->source + sd->starts[iam] + length_local, + sd->temporary[iam]); + + possibly_stable_sort<stable, SortingPlacesIterator, Comparator>() + (sd->temporary[iam], sd->temporary[iam] + length_local, comp); + + // Invariant: locally sorted subsequence in sd->temporary[iam], + // sd->temporary[iam] + length_local. + + // No barrier here: Synchronization is done by the splitting routine. + + difference_type num_samples = + _Settings::get().sort_mwms_oversampling * sd->num_threads - 1; + split_consistently + <exact, RandomAccessIterator, Comparator, SortingPlacesIterator>() + (iam, sd, comp, num_samples); + + // Offset from target begin, length after merging. + difference_type offset = 0, length_am = 0; + for (thread_index_t s = 0; s < sd->num_threads; s++) + { + length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin; + offset += sd->pieces[iam][s].begin; + } + + typedef std::vector< + std::pair<SortingPlacesIterator, SortingPlacesIterator> > + seq_vector_type; + seq_vector_type seqs(sd->num_threads); + + for (int s = 0; s < sd->num_threads; ++s) + { + seqs[s] = + std::make_pair(sd->temporary[s] + sd->pieces[iam][s].begin, + sd->temporary[s] + sd->pieces[iam][s].end); + } + + possibly_stable_multiway_merge< + stable, + typename seq_vector_type::iterator, + RandomAccessIterator, + Comparator, difference_type>() + (seqs.begin(), seqs.end(), + sd->source + offset, comp, + length_am); + +# pragma omp barrier + + ::operator delete(sd->temporary[iam]); + } + +/** @brief PMWMS main call. + * @param begin Begin iterator of sequence. + * @param end End iterator of sequence. + * @param comp Comparator. + * @param n Length of sequence. + * @param num_threads Number of threads to use. + */ +template<bool stable, bool exact, typename RandomAccessIterator, + typename Comparator> + void + parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end, + Comparator comp, + thread_index_t num_threads) + { + _GLIBCXX_CALL(end - begin) + + typedef std::iterator_traits<RandomAccessIterator> traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + + difference_type n = end - begin; + + if (n <= 1) + return; + + // at least one element per thread + if (num_threads > n) + num_threads = static_cast<thread_index_t>(n); + + // shared variables + PMWMSSortingData<RandomAccessIterator> sd; + difference_type* starts; + +# pragma omp parallel num_threads(num_threads) + { + num_threads = omp_get_num_threads(); //no more threads than requested + +# pragma omp single + { + sd.num_threads = num_threads; + sd.source = begin; + + sd.temporary = new value_type*[num_threads]; + + if (!exact) + { + difference_type size = + (_Settings::get().sort_mwms_oversampling * num_threads - 1) + * num_threads; + sd.samples = static_cast<value_type*>( + ::operator new(size * sizeof(value_type))); + } + else + sd.samples = NULL; + + sd.offsets = new difference_type[num_threads - 1]; + sd.pieces = new std::vector<Piece<difference_type> >[num_threads]; + for (int s = 0; s < num_threads; ++s) + sd.pieces[s].resize(num_threads); + starts = sd.starts = new difference_type[num_threads + 1]; + + difference_type chunk_length = n / num_threads; + difference_type split = n % num_threads; + difference_type pos = 0; + for (int i = 0; i < num_threads; ++i) + { + starts[i] = pos; + pos += (i < split) ? (chunk_length + 1) : chunk_length; + } + starts[num_threads] = pos; + } //single + + // Now sort in parallel. + parallel_sort_mwms_pu<stable, exact>(&sd, comp); + } //parallel + + delete[] starts; + delete[] sd.temporary; + + if (!exact) + ::operator delete(sd.samples); + + delete[] sd.offsets; + delete[] sd.pieces; + } +} //namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */ |