Skip to content
This repository was archived by the owner on Aug 7, 2025. It is now read-only.

Commit 4e868b4

Browse files
committed
Split suffix sort code into a separate source file
To prepare for the possibility of testing other suffix sort algorithms in the future, split this code into a separate source file for clarity. Signed-off-by: Patrick McCarty <patrick.mccarty@intel.com>
1 parent 71b9d7e commit 4e868b4

File tree

4 files changed

+199
-169
lines changed

4 files changed

+199
-169
lines changed

Makefile.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ lib_LTLIBRARIES = \
6767

6868
libbsdiff_la_SOURCES = \
6969
src/diff.c \
70-
src/patch.c
70+
src/patch.c \
71+
src/sufsort.c
7172

7273
libbsdiff_la_LIBADD = \
7374
$(zlib_LIBS)

src/bsheader.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define __INCLUDE_GUARD_BSHEADER_H
33

44
#include <stdint.h>
5+
#include <sys/types.h> // for u_char
56

67
#include "bsdiff.h"
78

@@ -177,4 +178,6 @@ static inline int eblock_get_enc(enc_flags_t enc)
177178
}
178179
}
179180

181+
int qsufsort(int64_t *, int64_t *, u_char *, int64_t);
182+
180183
#endif

src/diff.c

Lines changed: 0 additions & 168 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,6 @@ __FBSDID
3232
#define _GNU_SOURCE
3333
#include "config.h"
3434

35-
#include <sys/types.h>
36-
3735
#ifdef BSDIFF_WITH_BZIP2
3836
#include <bzlib.h>
3937
#endif
@@ -76,172 +74,6 @@ static int bsdiff_fulldl;
7674
#undef MIN
7775
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
7876

79-
/* NOTES:
80-
* I and V are chunks of memory (arrays) with length = (oldfile size +1) * sizeof(int64_t).
81-
* Additionally, we pass in arraylen now. The parent function qsufsort receives it, so it
82-
* should be available here as well for error checking.
83-
* start: is actually the point in the array sent in during the suffix sort, which sorts by
84-
* small blocks/chunks.
85-
* len: refers to the length of the current chunk being processed - NOT the array length(s).
86-
* h: will never be more than 8, and increases by *2 during suffix sort (h += h) */
87-
static void split(int64_t *I, int64_t *V, int64_t arraylen, int64_t start, int64_t len,
88-
int64_t h)
89-
{
90-
int64_t i, j, k, x, tmp, jj, kk;
91-
92-
if (len < 16) {
93-
for (k = start; k < start + len; k += j) {
94-
j = 1;
95-
x = V[I[k] + h];
96-
for (i = 1; k + i < start + len; i++) {
97-
if (V[I[k + i] + h] < x) {
98-
x = V[I[k + i] + h];
99-
j = 0;
100-
}
101-
if (V[I[k + i] + h] == x) {
102-
tmp = I[k + j];
103-
I[k + j] = I[k + i];
104-
I[k + i] = tmp;
105-
j++;
106-
}
107-
}
108-
for (i = 0; i < j; i++) {
109-
V[I[k + i]] = k + j - 1;
110-
}
111-
if (j == 1) {
112-
I[k] = -1;
113-
}
114-
}
115-
return;
116-
}
117-
118-
x = V[I[start + len / 2] + h];
119-
jj = 0;
120-
kk = 0;
121-
for (i = start; i < start + len; i++) {
122-
if (V[I[i] + h] < x) {
123-
jj++;
124-
}
125-
if (V[I[i] + h] == x) {
126-
kk++;
127-
}
128-
}
129-
jj += start;
130-
kk += jj;
131-
132-
i = start;
133-
j = 0;
134-
k = 0;
135-
while (i < jj) {
136-
if (V[I[i] + h] < x) {
137-
i++;
138-
} else if (V[I[i] + h] == x) {
139-
tmp = I[i];
140-
I[i] = I[jj + j];
141-
I[jj + j] = tmp;
142-
j++;
143-
} else {
144-
tmp = I[i];
145-
I[i] = I[kk + k];
146-
I[kk + k] = tmp;
147-
k++;
148-
}
149-
}
150-
151-
while (jj + j < kk) {
152-
if (V[I[jj + j] + h] == x) {
153-
j++;
154-
} else {
155-
tmp = I[jj + j];
156-
I[jj + j] = I[kk + k];
157-
I[kk + k] = tmp;
158-
k++;
159-
}
160-
}
161-
162-
if (jj > start) {
163-
split(I, V, arraylen, start, jj - start, h);
164-
}
165-
166-
for (i = 0; i < kk - jj; i++) {
167-
V[I[jj + i]] = kk - 1;
168-
}
169-
if (jj == kk - 1) {
170-
I[jj] = -1;
171-
}
172-
173-
if (start + len > kk) {
174-
split(I, V, arraylen, kk, start + len - kk, h);
175-
}
176-
}
177-
178-
/* The old_data (previous file data) is passed into this suffix sort and sorted
179-
* accordingly using the I and V arrays, which are both of length oldsize +1. */
180-
static int qsufsort(int64_t *I, int64_t *V, u_char *old, int64_t oldsize)
181-
{
182-
int64_t buckets[QSUF_BUCKET_SIZE];
183-
int64_t i, h, len;
184-
185-
for (i = 0; i < QSUF_BUCKET_SIZE; i++) {
186-
buckets[i] = 0;
187-
}
188-
for (i = 0; i < oldsize; i++) {
189-
buckets[old[i]]++;
190-
}
191-
for (i = 1; i < QSUF_BUCKET_SIZE; i++) {
192-
buckets[i] += buckets[i - 1];
193-
}
194-
for (i = QSUF_BUCKET_SIZE - 1; i > 0; i--) {
195-
buckets[i] = buckets[i - 1];
196-
}
197-
buckets[0] = 0;
198-
199-
for (i = 0; i < oldsize; i++) {
200-
if (buckets[old[i]] > oldsize + 1) {
201-
return -1;
202-
}
203-
I[++buckets[old[i]]] = i;
204-
}
205-
206-
for (i = 0; i < oldsize; i++) {
207-
V[i] = buckets[old[i]];
208-
}
209-
V[oldsize] = 0;
210-
for (i = 1; i < QSUF_BUCKET_SIZE; i++) {
211-
if (buckets[i] == buckets[i - 1] + 1) {
212-
I[buckets[i]] = -1;
213-
}
214-
}
215-
I[0] = -1;
216-
217-
for (h = 1; I[0] != -(oldsize + 1); h += h) {
218-
len = 0;
219-
for (i = 0; i < oldsize + 1;) {
220-
if (I[i] < 0) {
221-
len -= I[i];
222-
i -= I[i];
223-
} else {
224-
if (len) {
225-
I[i - len] = -len;
226-
}
227-
len = V[I[i]] + 1 - i;
228-
split(I, V, oldsize, i, len, h);
229-
i += len;
230-
len = 0;
231-
}
232-
}
233-
if (len) {
234-
I[i - len] = -len;
235-
}
236-
}
237-
238-
for (i = 0; i < oldsize + 1; i++) {
239-
I[V[i]] = i;
240-
}
241-
242-
return 0;
243-
}
244-
24577
static int64_t matchlen(u_char *old, int64_t oldsize, u_char *new,
24678
int64_t newsize)
24779
{

0 commit comments

Comments
 (0)