imbuffer.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. /****************************************************************************
  2. *
  3. * MODULE: iostream
  4. *
  5. * COPYRIGHT (C) 2007 Laura Toma
  6. *
  7. *
  8. * Iostream is a library that implements streams, external memory
  9. * sorting on streams, and an external memory priority queue on
  10. * streams. These are the fundamental components used in external
  11. * memory algorithms.
  12. * Credits: The library was developed by Laura Toma. The kernel of
  13. * class STREAM is based on the similar class existent in the GPL TPIE
  14. * project developed at Duke University. The sorting and priority
  15. * queue have been developed by Laura Toma based on communications
  16. * with Rajiv Wickremesinghe. The library was developed as part of
  17. * porting Terraflow to GRASS in 2001. PEARL upgrades in 2003 by
  18. * Rajiv Wickremesinghe as part of the Terracost project.
  19. *
  20. * This program is free software; you can redistribute it and/or modify
  21. * it under the terms of the GNU General Public License as published by
  22. * the Free Software Foundation; either version 2 of the License, or
  23. * (at your option) any later version.
  24. *
  25. * This program is distributed in the hope that it will be useful,
  26. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  27. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  28. * General Public License for more details. *
  29. * **************************************************************************/
  30. #ifndef __IMBUFFER_H
  31. #define __IMBUFFER_H
  32. #include <stdio.h>
  33. #include <assert.h>
  34. #include <stdlib.h>
  35. #include "ami_config.h" //for SAVE_MEMORY
  36. #include "ami_stream.h"
  37. #include "mm.h"
  38. #include "mm_utils.h"
  39. #include "pqheap.h"
  40. /* to do: - iterative sort */
  41. /*****************************************************************
  42. *****************************************************************
  43. *****************************************************************
  44. in memory buffer (level-0 buffer):
  45. Functionality:
  46. Stores an array of data in memory; when it becomes full, sorts the
  47. data and copies it on secondary storage in a level-1 buffer; data is
  48. stored contiguously from left to right;
  49. assume: class T supports < and getPriority and getValue; elements in
  50. buffer are sorted according to < relation
  51. *****************************************************************
  52. *****************************************************************
  53. *****************************************************************/
  54. template<class T>
  55. class im_buffer {
  56. private:
  57. //maximum capacity of buffer
  58. unsigned long maxsize;
  59. //index of next empty entry in buffer; between 0 and maxsize;
  60. //initialized to 0
  61. unsigned long size;
  62. //stored data
  63. T* data;
  64. bool sorted; //true if it is sorted; set when the buffer is sorted
  65. //to prevent sorting it twice
  66. public:
  67. //create a buffer of maxsize n
  68. im_buffer(long n): maxsize(n), size(0), sorted(false) {
  69. assert(n >= 0);
  70. char str[100];
  71. sprintf(str, "im_buffer: allocate %ld\n",(long)(maxsize*sizeof(T)));
  72. MEMORY_LOG(str);
  73. data = new T[maxsize];
  74. assert(data);
  75. }
  76. //copy constructor
  77. im_buffer(const im_buffer &b);
  78. //free memory
  79. ~im_buffer() {
  80. if (data) delete [] data;
  81. }
  82. //insert an item in buffer in next free position; fail if buffer full
  83. bool insert(T &x);
  84. //insert n items in buffer; return the number of items acually inserted
  85. unsigned long insert(T*x, unsigned long n);
  86. //(quick)sort (ascending order) the buffer (in place);
  87. //the buffer is overwritten; recursive for the time being..
  88. void sort();
  89. //return maxsize of the buffer (number of elements);
  90. unsigned long get_buf_maxlen() const { return maxsize;}
  91. //return current size of the buffer(number of elements);
  92. unsigned long get_buf_len() const { return size;}
  93. //return true if buffer full
  94. bool is_full() const { return (size == maxsize);}
  95. //return true if buffer empty
  96. bool is_empty() const { return (size == 0);}
  97. //return i'th item in buffer
  98. T get_item(unsigned long i) const {
  99. assert((i>=0) && (i < size));
  100. return data[i];
  101. }
  102. //return data
  103. T* get_array() const { return data;}
  104. //write buffer to a stream; create the stream and return it
  105. AMI_STREAM<T>* save2str() const;
  106. //set i'th item in buffer
  107. void set_item(unsigned long i, T& item) {
  108. assert((i>=0) && (i < size));
  109. data[i] = item;
  110. sorted = false;
  111. }
  112. //reset buffer (delete all data); if SAVE_MEMORY is on, free also the space
  113. void reset() {
  114. size = 0;
  115. sorted = false;
  116. #ifdef SAVE_MEMORY
  117. delete [] data;
  118. data = NULL;
  119. #endif
  120. }
  121. //reset buffer (delete all data); don't delete memory
  122. void clear() {
  123. size = 0;
  124. sorted = false;
  125. }
  126. //reset buffer: keep n elements starting at position start
  127. void reset(unsigned long start, unsigned long n);
  128. //shift n items to the left: in effect this deletes the first n items
  129. void shift_left(unsigned long n);
  130. //print the elements in the buffer
  131. friend ostream& operator << (ostream& s, const im_buffer &b) {
  132. s << "(buffer:) [";
  133. for (int i=0; i < b.size; i++) {
  134. s << b.data[i] << ", ";
  135. }
  136. return s << "]";
  137. }
  138. //print range: prints the range of the items in buffer
  139. void print_range() const;
  140. //print
  141. void print() const;
  142. private:
  143. //sort the buffer (recursively)
  144. void sort_rec(unsigned long start, unsigned long end);
  145. //partition the buffer and return the the position of the pivot
  146. unsigned long partition(unsigned long start, unsigned long end);
  147. };
  148. /************************************************************/
  149. //copy constructor
  150. template<class T>
  151. im_buffer<T>::im_buffer(const im_buffer &b) {
  152. MEMORY_LOG("im_buffer: copy constructor start\n");
  153. maxsize = b.maxsize;
  154. size = b.size;
  155. sorted = b.sorted;
  156. assert(data);
  157. for (unsigned long i=0; i<size; i++) {
  158. data[i] = b.data[i];
  159. }
  160. MEMORY_LOG("im_buffer: copy constructor end\n");
  161. }
  162. /************************************************************/
  163. //insert an item in buffer; fail if buffer full
  164. template<class T>
  165. bool im_buffer<T>::insert(T &x) {
  166. if (size == maxsize) {
  167. return false; //buffer full
  168. }
  169. #ifdef SAVE_MEMORY
  170. if (!data) {
  171. data = new T [maxsize];
  172. }
  173. #endif
  174. assert(data);
  175. assert(size < maxsize);
  176. data[size] = x;
  177. size++;
  178. sorted = false; //not worth checking..
  179. return true;
  180. }
  181. /************************************************************/
  182. //insert n items in buffer; return the number of items acually inserted
  183. template<class T>
  184. unsigned long im_buffer<T>::insert(T*x, unsigned long n) {
  185. assert(x);
  186. for (unsigned long i=0; i<n; i++) {
  187. if (!insert(x[i])) {
  188. return i;
  189. }
  190. }
  191. assert(sorted == false);
  192. return n;
  193. }
  194. /************************************************************/
  195. //(quick)sort (ascending order) the buffer (in place);
  196. //the buffer is overwritten; recursive for the time being..
  197. template<class T>
  198. void im_buffer<T>::sort () {
  199. if (!is_empty()) {
  200. if (!sorted) {
  201. //use my quicksort - eventually must be iterative
  202. //sort_rec(0, size-1);
  203. //use system quicksort
  204. qsort((T*)data, size, sizeof(T), T::qscompare);
  205. }
  206. }
  207. sorted = true;
  208. }
  209. /************************************************************/
  210. template<class T>
  211. void im_buffer<T>::sort_rec(unsigned long start, unsigned long end) {
  212. unsigned long q;
  213. if (start < end) {
  214. q = partition(start, end);
  215. sort_rec(start, q);
  216. sort_rec(q+1, end);
  217. }
  218. }
  219. /************************************************************/
  220. //partition the buffer in place and return the the position of the
  221. //pivot
  222. template<class T>
  223. unsigned long im_buffer<T>::partition(unsigned long start, unsigned long end) {
  224. assert((start <= end) && (end < size) && (start >=0));
  225. if (start == end) {
  226. return start;
  227. }
  228. T pivot = get_item(start), lit, rit;
  229. unsigned long l = start - 1, r = end + 1;
  230. while (1) {
  231. do {
  232. r = r - 1;
  233. } while (get_item(r) > pivot);
  234. do {
  235. l = l + 1;
  236. } while (get_item(l) < pivot);
  237. if (l < r) {
  238. lit = get_item(l);
  239. rit = get_item(r);
  240. set_item(l, rit);
  241. set_item(r, lit);
  242. } else {
  243. //printf("partition (%ld,%ld) return %ld\n", start, end, r);
  244. return r;
  245. }
  246. }
  247. }
  248. /************************************************************/
  249. //reset buffer: keep n elements starting at position start
  250. template<class T>
  251. void im_buffer<T>::reset(unsigned long start, unsigned long n) {
  252. if (start >= size) {
  253. //buffer is completely reset
  254. assert(n==0);
  255. size = 0;
  256. sorted = false;
  257. return;
  258. }
  259. assert((start >= 0) && (start + n <= size));
  260. size = n;
  261. if (n) {
  262. memmove(data, data + start, n*sizeof(T));
  263. }
  264. //remains sorted
  265. }
  266. /************************************************************/
  267. //shift n items to the left: in effect this deletes the first n items
  268. template<class T>
  269. void im_buffer<T>::shift_left(unsigned long n) {
  270. assert(n <= size);
  271. //remains sorted
  272. if (n) {
  273. memmove(data, data + n, (size-n)*sizeof(T));
  274. size -= n;
  275. }
  276. }
  277. /************************************************************/
  278. //print range of the (priority of) items in the buffer
  279. template<class T>
  280. void im_buffer<T>::print_range() const {
  281. if (size==0) {
  282. cout << "[]";
  283. } else {
  284. #ifdef SAVE_MEMORY
  285. if (!data) {
  286. data = new T [maxsize];
  287. }
  288. #endif
  289. assert(data);
  290. //determin min and max
  291. T min, max;
  292. min = data[0];
  293. if (sorted) {
  294. max = data[size];
  295. } else {
  296. max = data[0];
  297. for (int i=1; i < size; i++) {
  298. if (data[i] < min) {
  299. min = data[i];
  300. }
  301. if (data[i] > max) {
  302. max = data[i];
  303. }
  304. }
  305. }
  306. //print min and max
  307. cout << "[";
  308. cout << min.getPriority() << ".."
  309. << max.getPriority();
  310. cout << " (sz=" << size << ")"; //print also bufsize
  311. cout << "]";
  312. } //else (size==0)
  313. }
  314. /************************************************************/
  315. //print (priority of) all items in buffer
  316. template<class T>
  317. void im_buffer<T>::print() const {
  318. cout << "[";
  319. for (unsigned long i=0; i < size; i++) {
  320. cout << data[i].getPriority() << ",";
  321. }
  322. cout << "]";
  323. }
  324. /************************************************************/
  325. //write buffer to a stream; create the stream and return it;
  326. //buffer must be sorted prior to saving (functionality of empq)
  327. template<class T>
  328. AMI_STREAM<T>* im_buffer<T>::save2str() const {
  329. AMI_err ae;
  330. AMI_STREAM<T>* amis = new AMI_STREAM<T>();
  331. assert(amis);
  332. assert(sorted);//buffer must be sorted prior to saving;
  333. for (unsigned long i=0; i< size; i++) {
  334. ae = amis->write_item(data[i]);
  335. assert(ae == AMI_ERROR_NO_ERROR);
  336. }
  337. return amis;
  338. }
  339. #endif