Simple merge sort. Merge sort


Details Category: Sorting and Search

Merge sort merge sort) is a sorting algorithm that arranges lists (or other data structures whose elements can only be accessed sequentially, such as streams) in a specific order. This sorting is good example using the principle of “divide and conquer”. First, the task is divided into several smaller subtasks. These problems are then solved using a recursive call or directly if their size is small enough. Finally, their solutions are combined to produce a solution to the original problem.

  1. the array is recursively split in half, and each half is divided until the size of the next subarray becomes equal to one;
  2. Next, an algorithm operation called merging is performed. Two unit arrays are merged into a common resulting array, and the smaller element is selected from each (sorted in ascending order) and written to the free left cell of the resulting array. After that, a third common sorted array is assembled from the two resulting arrays, and so on. If one of the arrays runs out, the elements of the other are added to the assembled array;
  3. at the end of the merge operation, elements are rewritten from the resulting array to the original one.

Implementation of the algorithm on various languages programming:

C++

/** * @brief Sorting elements l to r of array buf * @param buf - array to be sorted * @param l - left border. At the first iteration l = 0 * @param r - right boundary. At the first iteration, r = buf.size() - 1 * * As a result, all elements of the buf array from l to r inclusive are sorted. */ template void MergeSort(vector & buf, size_t l, size_t r) ( //! Condition for exiting recursion if(l >= r) return; size_t m = (l + r) / 2; //! Recursive sorting of the resulting arrays MergeSort(buf, l, m); MergeSort(buf, m+1, r); merge(buf, l, r, m); ) /** * @brief Merge elements. * @param buf - array * @param l - left border. At the first iteration l = 0 * @param r - right boundary. At the first iteration, r = buf.size() - 1 * @param m - the boundary of the subarrays. * * The buf array contains two sorted subarrays: * - - the first sorted subarray. * - - second sorted subarray. * The result is a sorted array made from two subarrays, * which is stored in buf. */ template static void merge(vector & buf, size_t l, size_t r, size_t m) ( if (l >= r || m< l || m >r)return; if (r == l + 1 && buf[l] > buf[r]) ( swap(buf[l], buf[r]); return; ) vector tmp(&buf[l], &buf[l] + (r + 1)); for (size_t i = l, j = 0, k = m - l + 1; i<= r; ++i) { if (j >m - l) ( buf[i] = tmp; ) else if(k > r - l) ( buf[i] = tmp; ) else ( buf[i] = (tmp[j]< tmp[k]) ? tmp : tmp; } } }

There is also an iterative sorting algorithm that eliminates the need for recursive calls. This algorithm is called “Ascending Merge Sort”.

// Merging two ordered arrays // m1 - First array // m2 - Second array // l1 - Length of the first array // l2 - Length of the second array // Returns the merged array template T* merge(T *m1, T* m2, int l1, int l2) ( T* ret = new T; int n = 0; // Merge arrays until one runs out while (l1 && l2) ( if (* m1< *m2) { ret[n] = *m1; m1++; --l1; } else { ret[n] = *m2; ++m2; --l2; } ++n; } // Если закончился первый массив if (l1 == 0) { for (int i = 0; i < l2; ++i) { ret = *m2++; } } else { // Если закончился второй массив for (int i = 0; i < l1; ++i) { ret = *m1++; } } return ret; } // Функция восходящего слияния template void mergeSort(T * mas, int len) ( int n = 1, l, ost; T * mas1; while (n< len) { l = 0; while (l < len) { if (l + n >= len) break; ost = (l + n * 2 > len) ? (len - (l + n)) : n; mas1 = merge(mas + l, mas + l + n, n, ost); for (int i = 0; i< n + ost; ++i) mas = mas1[i];//тут нужно что-то поменять, потому что это лишнее копирование, и оно увеличивает время работы алгоритма в два раза delete mas1; l += n * 2; } n *= 2; } }

Example: char a = "ASORTINGEXAMPLE"; mergeSort(a, 16); Alternative version Merge Sort algorithm.

Template void Merge(Item Mas, int left, int right, int medium) ( int j = left; int k = medium + 1; int count = right - left + 1; if (count<= 1) return; Item *TmpMas = new Item; for (int i = 0; i < count; ++i) { if (j <= medium && k <= right) { if (Mas[j] < Mas[k]) TmpMas[i] = Mas; else TmpMas[i] = Mas; } else { if (j <= medium) TmpMas[i] = Mas; else TmpMas[i] = Mas; } } j = 0; for (int i = left; i <= right; ++i) { Mas[i] = TmpMas; } delete TmpMas; } template void MergeSort(Item a, int l, int r) ( int m; // Condition for exiting recursion if(l >= r) return; m = (l + r) / 2; // Recursive sorting of the resulting arrays MergeSort(a , l, m); MergeSort(a, m + 1, r); Merge(a, l, r, m); )

C#

Static Int32 Merge_Sort(Int32 massive) ( if (massive.Length == 1) return massive; Int32 mid_point = massive.Length / 2; return Merge(Merge_Sort(massive.Take(mid_point).ToArray()), Merge_Sort(massive. Skip(mid_point).ToArray())); ) static Int32 Merge(Int32 mass1, Int32 mass2) ( Int32 a = 0, b = 0; Int32 merged = new int; for (Int32 i = 0; i< mass1.Length + mass2.Length; i++) { if (b < mass2.Length && a < mass1.Length) if (mass1[a] >mass2[b]) merged[i] = mass2; else //if int go for merged[i] = mass1; else if (b< mass2.Length) merged[i] = mass2; else merged[i] = mass1; } return merged; } static void Main(string args) { Int32 arr = new Int32; //заполняем массив случайными числами Random rd = new Random(); for(Int32 i = 0; i < arr.Length; ++i) { arr[i] = rd.Next(1, 101); } System.Console.WriteLine("The array before sorting:"); foreach (Int32 x in arr) { System.Console.Write(x + " "); } //сортировка arr = Merge_Sort(arr); System.Console.WriteLine("\n\nThe array after sorting:"); foreach (Int32 x in arr) { System.Console.Write(x + " "); } System.Console.WriteLine("\n\nPress the key"); System.Console.ReadLine(); )

C#

//the previous example only sorts integers. The next one works with all data types. static IComparable Merge_Sort(IComparable massive) ( if (massive.Length == 1) return massive; int mid_point = massive.Length / 2; return Merge(Merge_Sort(massive.Take(mid_point).ToArray()), Merge_Sort(massive. Skip(mid_point).ToArray())); ) static IComparable Merge(IComparable mass1, IComparable mass2) ( int a = 0, b = 0; IComparable merged = new IComparable; for (int i = 0; i< mass1.Length + mass2.Length; i++) { if (b.CompareTo(mass2.Length) < 0 && a.CompareTo(mass1.Length) < 0) if (mass1[a].CompareTo(mass2[b]) >0) merged[i] = mass2; else merged[i] = mass1; else if (b< mass2.Length) merged[i] = mass2; else merged[i] += mass1; } return merged; } static void Main(string args) { IComparable arr = new IComparable; Random rd = new Random(); for (int i = 0; i < arr.Length; ++i) arr[i] = rd.Next(1, 101); Console.WriteLine("Массив перед сортировкой:"); foreach (int x in arr) System.Console.Write(x + " "); arr = Merge_Sort(arr); Console.WriteLine("\n\nМассив после сортировки:"); foreach (int x in arr) System.Console.Write(x + " "); Console.WriteLine("\n\nДля выхода нажмите ."); Console.ReadKey(); )

Perl

@out=(5,2,8,9,4,2,7,9,4,1,6,9,0); sub sortM( my($array,$first,$last)=@_; if($last>$first)( my$middle=int(($last+$first)/2); sortM($array,$first ,$middle); sortM($array,$middle+1,$last); my$j=0; $work[$j++]=$$array[$_]for($first..$last); $ middle=int(($first+$last)/2)if($middle>$last); my$n=$middle-$first+1; for($i=$first,$j=0,$k= $n;$i<=$last;$i++){ if(($j<$n)&&(($k==(($last-$first)+1))||($work[$j]lt$work[$k]))){ $$array[$i]=$work[$j++] }else{ $$array[$i]=$work[$k++]; } } } } sortM(\@out,0,$#out+1);

Pascal (sorting text files)

Simple merge sort

Procedure MergeSort(name: string; var f: text); Var a1,a2,s,i,j,kol,tmp: integer; f1,f2: text; b: boolean; Begin col:=0; Assign(f,name); Reset(f); While not EOF(f) do begin read(f,a1); inc(kol); End; Close(f); Assign(f1,"(name of 1st auxiliary file)"); Assign(f2,"(name of 2nd auxiliary file)"); s:=1; While (s 0 then begin tmp:=kol div 2; While tmp mod s<>0 do begin Read(f,a1); Write(f1,a1," "); inc(tmp); End; End; While not EOF(f) do begin Read(f,a2); Write(f2,a2," "); End; Close(f); Close(f1); Close(f2); Rewrite(f); Reset(f1); Reset(f2); Read(f1,a1); Read(f2,a2); While (not EOF(f1)) and (not EOF(f2)) do begin i:=0; j:=0; b:=true; While (b) and (not EOF(f1)) and (not EOF(f2)) do begin If (a1

Natural merge sort

Procedure MergeSort(name: string; var f: text); Var s1,s2,a1,a2,where,tmp: integer; f1,f2: text; Begin s1:=5; s2:=5; (You can specify any numbers that will start the while loop) Assign(f,name); Assign(f1,"(name of 1st auxiliary file)"); Assign(f2,"(name of 2nd auxiliary file)"); While (s1>1) and (s2>=1) do begin where:=1; s1:=0; s2:=0; Reset(f); Rewrite(f1); Rewrite(f2); Read(f,a1); Write(f1,a1," "); While not EOF(f) do begin read(f,a2); If(a2

Delphi (sorting arbitrary data types - simple merging)

Unit uMergeSort; interface type TItem = Integer; //Here you can write your custom type TArray = array of TItem; procedure MergeSort(var Arr: TArray); implementation function IsBigger(d1, d2: TItem) : Boolean; begin Result:= (d1 > d2); //Compare d1 and d2. Not necessarily so. Depends on your type. //You can add a comparison counter here end; //Merge sort procedure procedure MergeSort(var Arr: TArray); var tmp:TArray; //Temporary buffer //Merge procedure merge(L, Spl, R: Integer); var i, j, k: Integer; begin i:= L; j:= Spl + 1; k:= 0; //Select the smaller one from the first and add it to tmp while (i<= Spl) and (j <=R) do begin if IsBigger(Arr[i], Arr[j]) then begin tmp[k] := Arr[j]; Inc(j); end else begin tmp[k] := Arr[i]; Inc(i); end; Inc(k); end; //Просто дописываем в tmp оставшиеся эл-ты if i <= Spl then //Если первая часть не пуста for j:= i to Spl do begin tmp[k] := Arr[j]; Inc(k); end else //Если вторая часть не пуста for i:= j to R do begin tmp[k] := Arr[i]; Inc(k); end; //Перемещаем из tmp в arr Move(tmp, Arr[L], k*SizeOf(TItem)); end; //Сортировка procedure sort(L, R: Integer); var splitter: Integer; begin //Массив из 1-го эл-та упорядочен по определению if L >= R then Exit; splitter:= (L + R) div 2; //Divide the array in half sort(L, splitter); //Sort each sort(splitter + 1, R); //parts separately merge(L, splitter, R); //Merge end; //The main part of the sorting procedure begin SetLength(tmp, Length(Arr)); sort(0, Length(Arr) - 1); SetLength(tmp, 0); end; end.

D

Void mergeSort(int array) ( static void merge(int array, int q) ( int leftArray = array.dup ~ int.max; int rightArray = array.dup ~ int.max; int i = 0; int j = 0; int length = array.length; for (int k = 0; k< length; ++k) { array[k] = (leftArray[i] <= rightArray[j]) ? leftArray : rightArray; } } if (array.length >1) ( int q = array.length / 2; mergeSort(array); mergeSort(array); merge(array, q); ) )

Python 2.7 (functional implementation)

Def merge(right, left, result): result.append((left if left< right else right).pop(0)) return merge(right=right, left=left, result=result) if left and right else result+left+right merge_sort = (lambda arr: arr if len(arr) == 1 else merge(merge_sort(arr), merge_sort(arr[:len(arr)/2]), ))

The disadvantage of merge sort is that it uses extra memory. But when you have to work with files or lists that are accessed only sequentially, then it is very convenient to use this particular method. Also, the advantages of the algorithm include its stability and good operating speed O(n*logn).

When writing this article, open sources on the Internet were used:

Merge sort

(See page 430 by F.M. Carrano and J.J. Pritchard)

Two important divide-and-conquer sorting algorithms, merge sort and quick sort, have an elegant recursive implementation and are extremely efficient. In this section we'll look at merge sorting, but Chapter 14 will show that this algorithm can be generalized to external files. When formulating the algorithm, we will use the notation for an array segment theArray.

The merge sort algorithm is recursive. Its effectiveness does not depend on the order of the elements in the original array. Let's say we split the array in half, recursively ordered both halves, and then combined them into a single whole, as shown in Fig. 9.8. The figure shows that the parts of the array<1, 4, 8>And<2, 3>merged into an array<1, 2, 3, 4, 8>. During merging, elements located in different parts of the array are compared in pairs with each other, and the smaller element is sent to a temporary array. This process continues until one of the two parts of the array is used. Now you just need to copy the remaining elements into a temporary array. Finally, the contents of the temporary array are copied back to the original array.

Rice. 9.8. Merge sort using an auxiliary array

Although the merge results in an ordered array, it remains unclear how the sorting is performed in the previous steps. Merge sort is performed recursively. Its pseudocode looks like this:

mergesort(inout theArray:ItemArray,

in first: integer, in last: integer)

// Orders theArray segment,

// 1) sorting the first half of the array;

// 2) sorting the second half of the array;

// 3) combining two ordered halves of the array.

If (first< last)

mid = (first + last)/2 // Determine the middle

// Sort the segment theArray mergesort(theArray, first, mid)

// Sort the segment theArray mergesort(theArray, mid + 1, last)

// Combine the ordered segments of theArray

// and theArray

merge(theArray, first, mid, last)

) // End of if statement

// If first >= last, operations are completed

It is clear that the main operations of this algorithm are performed during the merging stage, and yet why does it result in an ordered array? Recursive calls continue to split parts of the array in half until there is only one element left. Obviously, an array consisting of one element is ordered. The algorithm then combines the array fragments until a single ordered array is formed. Recursive Function Calls mergesort and the results of the mergers are illustrated in Fig. 9.9 using an example of an array consisting of six integers.


Rice. 9.9. Merge sort of an array of six integers

Below is a C++ function that implements the merge sort algorithm. To sort an array theArray, consisting of nelements, a recursive call is mademergesort(theArray, 0, n-1).

const int MAX_SIZE = maximum-number-of-array-elements, -

void merge(DataType theArray, int first, int mid int last)

// Combines two ordered segments theArray and

//theArray into one ordered array.

// Precondition: first<= mid <= last. Оба подмассива

// theArray and theArray are ordered

// Ascending.

// Postcondition: theArray segment is ordered.

// Implementation note: the function merges two

// subarrays into a temporary array and then copies it

// contents in the original array theArray.

// _________________________________________________________

Analysis. Since the main operations in this algorithm are performed at the merge stage, let's start the analysis with it. At each step, subarrays are merged theArray And theArray. In Fig. Figure 9.10 shows an example in which the maximum number of comparisons is required. If the total number of elements of the merged array segments is equal to P, then when merging them, n-1 comparisons will need to be performed. (For example, in Fig. 9.10 shows an array consisting of six elements, therefore five comparisons are performed.) In addition, after the comparisons, a copy is performed P elements of the temporary array into the original one. Thus, at each merge step, 3*n-1 main operations are performed.



In function mergesort two recursive calls are made. As shown in Fig. 9.11, if the original function call mergesort belongs to level zero, then two recursive calls occur at level one. Each of these calls then generates two more second-level recursive calls, and so on. How many levels of recursion will there be? Let's try to count them.

Each call to the mergesort function splits the array in half. At the first stage, the original array is divided into two parts. On the next recursive function call mergesort each of these parts is divided in half again, forming four parts of the original array. On the next recursive call, each of these four parts is again divided in half, forming eight parts of the array, and so on. Recursive calls continue until the parts of the array contain only one element, in other words, until the original array is divided into P parts, which corresponds to the number of its elements, if the number P is a power of two (n=2 m), the recursion depth is k=log 2 n For example, as shown in Fig. 9.11, if the original array contains eight elements (8=2 3), then the recursion depth is 3. If the number P is not a power of two, the recursion depth is 1+ log 2 n (rounded value).

Original function call mergesort(level 0) accesses the function merge just one time. Then the function merge carries out a merger P elements, performing 3*n-1 operations. At the first level of recursion, two function calls are made mergesort and therefore the functions merge. Each of these two calls merges n/2 elements and requires 3*(n/2)-1 operations. Thus, at this level 2*(3*(n/2)-1)=3*n-2 operations are performed. At the mth level, recursions are performed 2 t merge function calls. Each of these calls results in a merge p/2 t elements, and the total number of operations is 3*(n/2 m)-2. In total, 2 m recursive function calls merge generates 3*n-2 m operations. Thus, O(l) operations are performed at each recursion level. Since the number of recursion levels is equal to log2n or log 2 n+l, in the worst and average cases the function mergesort has complexity O(n*log 2 n). Look at fig. 9.3 and make sure once again that the quantity O(n*log 2 n) grows much slower than the quantity O(n g).



Although the merge sort algorithm is extremely fast, it has one drawback. To perform the operation

Merge ordered subarrays theArray and theArray

An auxiliary array consisting of n elements is needed. If available memory is limited, this requirement may not be acceptable.


The partition shown in Fig. 9.12, is characterized by the fact that all elements of the set Si=theArray less support element R, and a lot S 2 = theArray consists of elements greater than or equal to the supporting one. Although this property does not imply that the array is ordered, it does imply an extremely useful fact: if the array is ordered, the elements in positions from first before pivotlndex-l, remain in their places, although their positions relative to each other may change. A similar statement holds for elements located in positions from pivotlndex+l before last. The reference element in the resulting ordered array will remain in its place.

in its place

This division of the array determines the recursive nature of the algorithm. Splitting an array relative to a reference element R generates two smaller sorting tasks - sorting the left (S 1) and right (S 2) parts of the array. Having solved these two problems, we obtain a solution to the original problem. In other words, splitting the array before recursive calls leaves the anchor element in place and ensures that the left and right portions of the array are in order. In addition, the quick sort algorithm is finite: the sizes of the left and right segments of the array are less than the size of the original array, and each recursion step brings us closer to the basis when the array consists of one element. This follows from the fact that the reference element p does not belong to any of the arrays S l and S 2 .

The pseudocode for the quicksort algorithm looks like this:

quicksort(inout theArray.- ItemArray,

in first:integer, in last:integer) // Orders theArray

if (first< last)

Select a reference element p from the array theArray Split the array theArray relatively

reference element p // The partition looks like theArray

// Order the SI array

quicksort(theArray, first, pivotlndex-l)

// Order array S2

quicksort(theArray, pivotlndex+l, last) ) // End of if statement // if first >= last, do nothing


How to choose a support element? If array elements are written in random order, you can select any element as a reference, for example theArray.(The procedure for selecting a reference element will be discussed in more detail later.) When dividing an array, it is convenient to place the reference element in a cell theArray, regardless of which element is selected as the reference.

The part of the array that contains elements that have not yet been distributed over the segments S1 and S2 is called undefined. So, consider the array shown in Fig. 9.14. Indexes first, lastS1, firstUnknown and last split the array into three parts. Relationships between the reference element and the elements of the indeterminate part theArray unknown.

Puc. 9.14. Invariant of the partitioning algorithm

When splitting an array, the following condition must be met.

The elements of the set S1 must be less than the support element, and the elements of the set S 2- greater than or equal to it.

This statement is an invariant of the partitioning algorithm. In order for its invariant to be executed at the beginning of the algorithm, it is necessary to initialize the array indices so that the entire array, except for the reference element, is considered undefined.

first firstUnknown last

Puc. 9.15. Initial state of the array


At each step of the partitioning algorithm, one element from the undefined part is checked. Depending on its value, it is placed in the set S1 or S2. Thus, at each step the size of the uncertain part decreases by one. The algorithm stops when the size of the undefined part becomes zero, i.e. condition is met firstUnknown > last.

Let's look at the pseudocode of this algorithm.

partition (inout theArray:ItemArray,

in first:integer, in last:integer,

out pivotlndex:integer) // Splits theArray

// Initialization

Select the support element and swap its places

with theArray element
p = theArray // p
- support element

// Set empty sets S1 and S 2, and initialize the undefined // part of the array with a segment

// theArray lastSl= first firstUnknown = first + 1

// Define sets Sj and S 2 while (firstUnknown<= last)

// Calculate the index of the leftmost element // of the undefined part of the array if (theArray< р)

Place theArray element in Sielse

Place theArray element in S 2 ) // End of the while statement

// Place a support element between the sets S 2 and S 2 // and remember its new index

Swap theArray and theArray pivotlndex = lastSl

The algorithm is quite simple, but the movement operation requires clarification. Consider two possible actions that need to be performed at each iteration of the loop while.

Place element theArray to the set S t . The set S1 and the indefinite part, as a rule, are not adjacent. Usually the set S 2 is located between them. However, this operation can be performed more efficiently. Element theArray can be swapped with the first element of the set S 2, i.e. with element theArray, as shown in Fig. 9.16. How to be an element of a set S2, which was placed in the cell theArray? If you increase the index firstUnknown by one, this element becomes the rightmost element in the set S 2 . Thus, to move an element theArray to array S1, you need to perform the following steps.

Swap the elements of theArray

and theArray Increment index lastSl by one Increment index firstUnknown by one

This strategy remains valid even if the set S 2 is empty. In this case the value lastSl+l equal to index firstUnknown and the element simply remains in place.

Place element theArray to the set S 2 . This operation is easy to perform. Recall that the index of the rightmost element of the set S 2 is equal to firstUnknown-1, those. the set S 2 and the unknown part are adjacent (Fig. 9.17). So to move an element theArray to the set S 2, you just need to increase the index firstUnknown by one, expanding the set S 2 to the right. In this case, the invariant is not violated.

After transferring all elements from the undefined part to the sets S 1 and S 2 remains to solve the last problem. You need to place a support element between the sets S1 and S2. Please note that the element theArray is



Rice. 9.17. Moving theArray element to set S2 after increasing the firstUnknown index by one

is the rightmost element of the set S1. If you swap it with the support element, it will be in the correct place. Therefore, the operator

pivotlndex = lastSl

allows you to determine the index of the reference element. This index can be used as a boundary between the sets Sj and 5r. The results of tracing the algorithm for partitioning an array consisting of six integers, when the first element is the reference, are shown in Fig. 9.18.

Before we begin implementing the quicksort algorithm, let's check the correctness of the partitioning algorithm using its invariants. The invariant of the cycle included in the algorithm has the following form.

All elements of the set S 2 (theArray) is less than the reference one, and all elements of the set S 2 (theArray) are greater than or equal to the reference one

Recall that to determine the correctness of an algorithm using its invariants, four steps must be performed.

1. The invariant must be true from the very beginning, before the loop is executed. In the partitioning algorithm, the supporting element is theArray, unknown part - array segment theArray, a of the set S1 and S 2 empty. Obviously, under these conditions the invariant is true.

2. Loop iterations must not violate the invariant. In the partitioning algorithm, each iteration of the loop transfers one element from the unknown part to the set S1 or S2, depending on its value compared to the reference. So, if the invariant was true before the transfer, it must remain true after the transfer.

3. The invariant must determine the correctness of the algorithm. In other words, the correctness of the algorithm must follow from the truth of the invariant. The partitioning algorithm stops when the undefined region becomes empty. In this case, each element of the segment theArray must belong to either the set S1 or the set S2. In any case, from the correctness of the invariant it follows that
the algorithm has achieved its goal.

4. The cycle must be finite. In other words, you need to show that the loop will complete after a finite number of iterations. In the partitioning algorithm, the size of the uncertain part is reduced by one at each iteration. Therefore, after a finite number of iterations, the undefined part becomes empty and the loop ends.



Rice. 9.18. First split of an array when the first element is the pivot


Before calling the quicksort function, the array is split into parts S1 and S2. The algorithm then orders the segments S1 and S2 independently of each other, since any element in the segment S1 is to the left of any element in the segment S2. In the mergeSort function, on the other hand, no work is done before the recursive calls. The algorithm orders each part of the array, constantly taking into account the relationships between the elements of both parts. For this reason, the algorithm must merge the two halves of the array after making recursive calls.

Analysis. The main work in the quicksort algorithm is done at the array partitioning stage. When analyzing each element belonging to the undefined part, it is necessary to compare theArray element with the reference one and place it either in the segment S1 or in the segment S2. One of the segments S1 or S2 may be empty; for example, if the support element is the smallest element of the segment, the set S1 will remain empty. This occurs in the worst case because the size of the segment S2 decreases by only one each time the quicksort function is called. Thus, in this situation, the maximum number of recursive calls to the quicksort function will be made.

The next time you call quicksort recursively, partition will look at n-1 elements. To distribute them across segments, n-2 comparisons will be needed. Since the size of the segment considered by the quicksort function decreases by only one at each level of recursion, there will be n-1 levels of recursion. Therefore, the quicksort function performs 1 + 2 + ...+ (n-1) = n * (n-1)/2 comparisons. Let us recall, however, that when transferring an element to the set S2, it is not necessary to rearrange the elements. To do this, you just need to change the firstUnknown index.

Likewise, if the set S2 is left empty on each recursive call, n * (n-1)/2 comparisons will be required. In addition, in this case, in order to transfer each element from the unknown part to the set S1, it will be necessary to rearrange the elements. Thus, *(n-1)/2 permutations will be needed. (Recall that each permutation is performed using three assignment operations.) So, in the worst case, the complexity of the quicksort algorithm is O(n2).

For contrast, in Fig. Figure 9.20 shows an example where the sets S1 and S2 consist of the same number of elements. In the average case, when the sets S1 and S2 consist of the same - or approximately the same - number of elements, written in random order, fewer recursive calls to quicksort will be required. As in the analysis of the mergeSort algorithm, it is easy to show that the depth of recursion in the quicksort algorithm is equal to log2n or log2n+l. Each call to quicksort performs m comparisons and at most m permutations, where m is the number of elements in the subarray to be sorted.



Quick sort: worst case O(n 2), average case O(n*logn).

Thus, with large arrays the algorithm

quicksort is significantly faster than insertionSort, although in the worst case they are both about the same performance.

The quicksort algorithm is often used to sort large arrays. The reason for its popularity is its exceptional performance, despite discouraging worst-case estimates. The fact is that this option is extremely rare, and in practice the quicksort algorithm works great with relatively large arrays.

The significant difference between the average and worst-case complexity estimates makes Quicksort stand out from the other algorithms discussed in this chapter. If the order of the elements in the original array is "random", the quicksort algorithm performs at least as well as any other algorithm that uses element comparisons. If the original array is completely unordered, the quicksort algorithm works best.

The mergeSort algorithm has approximately the same efficiency. In some cases the quicksort algorithm is faster, in others the mergeSort algorithm is faster. Although mergeSort's worst-case complexity estimate is of the same order of magnitude as quicksort's average-case complexity estimate, quicksort is slightly faster in most cases. However, in the worst case scenario, the performance of the quicksort algorithm is much lower.

The merge procedure requires two sorted arrays. Noting that a single-element array is sorted by definition, we can sort it like this:

    split the existing elements of the array into pairs and merge the elements of each pair, obtaining sorted chains of length 2 (except, perhaps, for one element for which there was no pair);

    split the existing sorted chains into pairs and merge the chains of each pair;

    if the number of sorted chains is greater than one, go to step 2.

The easiest way to formalize this algorithm is in a recursive way (in the next section we implement this algorithm in an iterative way). The function sorts the array section from element number a to element number b:

Since the function sorts only part of the array, when merging two fragments, it has no choice but to allocate a temporary buffer for the merge result and then copy the data back:

void MergeSort(char* M, int c)

if(c<2)return;// если размер меньше 2 то он упорядочен

MergeSort(M,c/2);//sort the first one recursively

//half

MergeSort(M+c/2,c-c/2);// the rest

char* T=(char*)malloc(c*sizeof(char));

Merge(M,c/2,M+c/2,c-c/2,T);//merge into one

for(int i=0;i

Listing 2. Implementing merge sort

3. Descending merge sort.

Once you have a merge procedure at your disposal, it is easy to use it as the basis for a recursive sort procedure. To sort a given file, we divide it into two parts, recursively sort both parts, and then merge them. The implementation of this algorithm is presented in Program 8.3; an example is illustrated in Fig. 8.2. This algorithm is one of the well-known examples of using the principle "divide and conquer" when developing effective algorithms.

Figure 8.2 Example of descending merge sort

Downward merge sorting is similar to the principle of top-down management, in which a manager organizes work in such a way that, having received a large task, he breaks it down into subtasks that his subordinates must independently solve. If each manager solves his own problem, dividing it into two equal parts, followed by combining the solutions received by his subordinates and then transferring the result to his superiors, then merge sorting is organized in approximately the same way. The work will not progress very far until someone who does not have subordinates receives and completes his task (in this case, this is the merger of two files of size I); however, management does much of the work by integrating the work of subordinates into a coherent whole.

Merge sort plays an important role due to the simplicity and optimality of its underlying method (its execution time is proportional to .Vlog/V), which allows for robust implementation. These statements are relatively easy to prove.

You can use a tree structure to get a visual representation of the structure of the recursive calls of a recursive algorithm, which will help you understand all the variants of the algorithm in question and analyze it. When it comes to merge sort, the structure of the recursive calls depends entirely on the size of the input. For any given N we are building a tree called "tree divide and conquer" describes the size of subfiles processed during program execution 8.3

Program 8.3. Descending merge sort we eat

This basic implementation of merge sort is an example of a recursive program inspired by the principle of divide and conquer. It sorts the array a,...,a[r] by dividing it into two parts a,...,a[m] and a,...,a(r] and then sorting them independently of each other ( via recursive calls) and merging the resulting ordered subfiles to ultimately produce a sorted source file. The function may require the use of a auxiliary file large enough to accept a copy of the input file, but it is convenient to think of this abstract operation as an exchange merge.

The structural properties of divide-and-conquer balanced trees are directly relevant to the analysis of merge sort. For example, the total number of comparisons performed by the algorithm is exactly equal to the sum of all node labels.

Figure 8.3. Trees, built on the principle of “divide and conquer”.

These diagrams illustrate the size of the subtasks that occur during the process of performing a descending merge sort. Unlike trees corresponding to, for example, fast. sorting, these schemes are determined only by the sizes of the source file, and not by the values ​​of the keys present in the file. The top diagram shows how a file consisting of 32 elements is sorted. We (recursively) sort two files of 16 elements each, then merge them. Files are sorted by 16 items, performing a (recursive) sort of files by 8 items, etc. For files whose size cannot be expressed as a power of 2, the scheme turns out to be somewhat more complex, as can be seen from the bottom diagram

    Merge sort requires approximatelyNlogNcomparison operations to sort any file of N elements.

Each type merger (N/2) on (N/2) requires N comparisons (this value will differ by 1 or 2 for different files, depending on how service labels are used). Therefore, the total number of comparisons in a full sort can be described by the standard balanced recurrence relation: Mn= M[ n/ 2] + M[ n\ 2] + N, where M1=0. This recurrence relation also describes the sum of node labels and the length of the external path). This statement is easy to verify when N is a power of 2 prove by induction for an arbitrary N.

    Merge sort uses extra space proportional to N.

We can take some steps to reduce the amount of extra space used, at the cost of significantly increasing the complexity of the algorithm. Merge sort is also effective if the file being sorted is organized as a linked list. In this case, the specified property is preserved, but additional memory space is consumed for the connections. In the case of arrays, as noted in the section, it is possible to perform an exchange merge; however, this strategy is unlikely to be justified in practice.

    Merge sort is stable if the merge method used is stable.

This statement can be easily verified by induction. To implement the merge method proposed in Program 8.1, it is easy to show that the relative position of duplicate keys is not violated. However, the more complex the algorithm, the higher the likelihood that this stability will be violated

    The resource demands of merge sort are not sensitive to the original order of the input file.

In our implementations, the input data only determines the order in which elements are processed during merges. Each pass requires memory space and a number of steps proportional to the size of the subfile. which is due to the cost of moving data to the auxiliary file. The corresponding two branches of the if statement may require slightly different compilation times, which in turn makes the execution time somewhat dependent on the nature of the input data, but the number of comparisons and other operations does not depend on how the input file is ordered.

It has been estimated that up to a quarter of the time of centralized computers is spent sorting data. This is because it is much easier to find a value in an array that has been pre-sorted. Otherwise, the search is a bit like looking for a needle in a haystack.

There are programmers who spend all their working time studying and implementing sorting algorithms. This is because the vast majority of business software involves database management. People search databases for information all the time. This means that search algorithms are in high demand.

But there is one "but". Search algorithms work much faster with databases that are already sorted. In this case, only a linear search is required.

While the computers are without users at some points in time, the sorting algorithms continue to operate on the databases. Searchers come again, and the database is already sorted based on one or another search purpose.

This article provides examples of implementations of standard sorting algorithms.

Selection sort

To sort an array in ascending order, you must find the element with the largest value at each iteration. With it you need to swap the last element. The next element with the highest value is placed in second to last place. This should happen until the elements in the first places in the array are in the proper order.

C++ code

void SortAlgo::selectionSort(int data, int lenD) ( int j = 0; int tmp = 0; for (int i=0; i data[k])( j = k; ) ) tmp = data[i]; data[i] = data[j]; data[j] = tmp; ) )

Bubble sort

Bubble sort compares adjacent elements and swaps places if the next element is smaller than the previous one. Multiple passes through the data are required. During the first pass, the first two elements in the array are compared. If they are not in order, they are swapped and then the elements in the next pair are compared. Under the same condition, they also change places. Thus, sorting occurs in each cycle until the end of the array is reached.

C++ code

void SortAlgo::bubbleSort(int data, int lenD) ( int tmp = 0; for (int i = 0;i =(i+1);j--)( if (data[j]

Insertion sort

Insertion sort splits the array into two regions: ordered and unordered. Initially, the entire array is an unordered region. On the first pass, the first element from the unordered region is removed and placed in the correct position in the ordered region.

On each pass, the size of the ordered region increases by 1, and the size of the disordered region decreases by 1.

The main loop runs from 1 to N-1. At the jth iteration, element [i] is inserted into the correct position in the ordered region. This is done by shifting all elements of the ordered region that are greater than [i] one position to the right. [i] is inserted into the space between those elements that are less than [i] and those that are greater than [i].

C++ code

void SortAlgo::insertionSort(int data, int lenD) ( int key = 0; int i = 0; for (int j = 1;j =0 && data[i]>key)( data = data[i]; i = i-1; data=key; ) ) )

Merge sort

C++ code

void SortAlgo::mergeSort(int data, int lenD) ( if (lenD>1)( int middle = lenD/2; int rem = lenD-middle; int * L = new int; int * R = new int; for ( int i=0;i

Quick sort

Quicksort uses a divide and conquer algorithm. It begins by splitting the original array into two regions. These parts are to the left and right of the marked element, called the support. At the end of the process, one part will contain elements smaller than the reference, and the other part will contain elements larger than the reference.

C++ code

void SortAlgo::quickSort(int * data, int const len) ( int const lenD = len; int pivot = 0; int ind = lenD/2; int i,j = 0,k = 0; if (lenD>1) ( int * L = new int ; int * R = new int ; pivot = data; for (i=0;i

  • Algorithms,
  • Programming
  • Someone once said that

    ...any scientist who couldn't explain to an eight-year-old what he was doing was a charlatan.

    It turns out it was Kurt Vonnegut.

    I did not seek to prove this statement. I sought to refute my stupidity.

    Let's say we have two arrays of numbers, sorted in ascending order.

    Int a1 = new int (21, 23, 24, 40, 75, 76, 78, 77, 900, 2100, 2200, 2300, 2400, 2500); int a2 = new int (10, 11, 41, 50, 65, 86, 98, 101, 190, 1100, 1200, 3000, 5000);
    It is necessary to merge them into one ordered array.

    Int a3 = new int;
    This is a task for merge sort.

    What it is? There is an answer on the Internet, there is a description of the algorithm, but I didn’t understand it in one sitting and decided to figure it out myself. To do this, you need to understand the basic principle of the algorithm so that you can recreate the algorithm from memory in relation to your problem.

    Let's start for health

    Let's go gradually and use what is on the surface: we will take one element from each array one by one, compare them and “merge” them into one array. We will place the smaller element first, the larger element second. Then after the first pass everything is fine:

    10, 21
    And after the second pass it’s not so good:

    10, 21, 11, 23
    It is clear that we need to compare the elements with those already added.

    Let's start again

    Let us have a certain temporary buffer of elements compared at each step. After the first pass, it will contain 21 and 10. After the comparison, we will move the smaller element 10 from the buffer to the resulting array and leave the larger element 21, because we do not know what will be behind it.

    After the second pass, the buffer will contain 21, 23 and 11. It’s not clear what to do with them; you can compare more than two elements, but it’s not so easy.

    Let's then agree that we will take one element from each array into this buffer. Since it’s easier to compare two elements with each other, and in general we have two entities - two arrays. Then after the second pass there will be 21 and 11 in the buffer, because the “representative” of the first array is already in the buffer - it’s 21. We compare them and send the smaller one to the resulting array. Then after the second pass we will have in the resulting array:

    10, 11
    And in the buffer - 21.

    On the third pass, we take 41 from the second array into the buffer, because the “representative” of the first array remains in the buffer. We compare 21 and 41, and finally remove 21 from the buffer.

    After the third pass we will have in the resulting array:

    10, 11, 21
    On the fourth pass we will compare two values ​​from the buffer - 41 and 23. The resulting array will contain:

    10, 11, 21, 23
    That is, only now - on the fourth, and not on the second pass - the result turned out to be correct. It turns out that in a loop you need to remember the current index for each array, and the loop itself can be as long as the sum of the lengths of the arrays.

    We're coming to the end, but suddenly

    What will we do when the resulting array consists of:

    10, 11, 21, 23, 24, 40, 41, 50, 65, 75, 76, 78, 77, 86, 98, 101, 190, 900, 1100, 1200, 2100, 2200, 2300, 2400, 2500,
    The buffer will contain 3000 from the second array, and in the first - will all the elements run out? Since our arrays are sorted, we simply take 3000 from the buffer and the remaining 5000. That is, we need to check for each index to see if we have exceeded the number of elements in each of the arrays.

    Let's complicate the task

    What if we have unsorted arrays? Usually the task comes down to sorting one array. Then merge sort can be used too.

    Let the first array (for example, take several elements from it) have the following arrangement of elements:

    2100, 23, 40, 24, 2, 1.
    We will sort it. Since it’s easier to compare two elements at a time, let’s split the array equally into two:

    2150, 23, 40
    And
    24, 2, 1.
    You will get three elements. A lot of! Let's split each array equally, we get four arrays:

    2100, 23 40 24, 2 1
    Let’s now sort each of the arrays by simply comparing the first and second elements (where they exist):

    23, 2100 40 2, 24 1
    And we will merge it back according to the previous algorithm - through a buffer. After the first merge we get two arrays:

    23, 40, 2100 1, 2, 24
    And we merge again - into one array:

    1, 2, 23, 24, 40, 2100
    This is how we merge sorted the array.

    Bottom line

    Thus, merge sort involves dividing the array equally until one array produces several small ones - no more than two elements in size. The two elements can be easily compared with each other and arranged depending on the requirement: ascending or descending.

    After the split, a reverse merge follows, in which at one point in time (or per pass through the loop), one element from each array is selected and compared with each other. The smallest (or largest) element is sent to the resulting array, the remaining element remains relevant for comparison with an element from another array in the next step.

    Let's express it in code (Java)

    An example of sorting in ascending order of two sorted arrays:

    Int a1 = new int (21, 23, 24, 40, 75, 76, 78, 77, 900, 2100, 2200, 2300, 2400, 2500); int a2 = new int (10, 11, 41, 50, 65, 86, 98, 101, 190, 1100, 1200, 3000, 5000); int a3 = new int; int i=0, j=0; for (int k=0; k a1.length-1) ( int a = a2[j]; a3[k] = a; j++; ) else if (j > a2.length-1) ( int a = a1[i]; a3[k] = a; i++; ) else if (a1[i]< a2[j]) { int a = a1[i]; a3[k] = a; i++; } else { int b = a2[j]; a3[k] = b; j++; } }
    Here:

    A1 and a2 – arrays that need to be merged;
    a3 – resulting array;
    i and j are indices for arrays a1 and a2, respectively, which point to the current elements at each step and form the same buffer.

    The first two conditions check that the indexes do not go beyond the number of elements in the arrays. The third and fourth conditions ensure that the smallest element from the first and second arrays is moved into the array, respectively.

    Merge sort function

    Let's format the above code as a recursive function that will separate the arrays as long as possible, with parameters corresponding to the whole array on the first call, its halves on the second and third calls, etc.

    Private void SortUnsorted(int a, int lo, int hi) ( if (hi<= lo) return; int mid = lo + (hi - lo) / 2; SortUnsorted(a, lo, mid); SortUnsorted(a, mid + 1, hi); int buf = Arrays.copyOf(a, a.length); for (int k = lo; k <= hi; k++) buf[k] = a[k]; int i = lo, j = mid + 1; for (int k = lo; k <= hi; k++) { if (i >mid) ( a[k] = buf[j]; j++; ) else if (j > hi) ( a[k] = buf[i]; i++; ) else if (buf[j]< buf[i]) { a[k] = buf[j]; j++; } else { a[k] = buf[i]; i++; } } }
    Here:

    A – array;
    lo – position of the first element in the array (for the first iteration = 0);
    hi – position of the last element in the array (for the first iteration = a.length - 1).





    

    2024 gtavrl.ru.