cdrtools/libparanoia/isort.c

/* @(#)isort.c	1.19 09/07/11 J. Schilling from cdparanoia-III-alpha9.8 */
#include <schily/mconfig.h>
#ifndef lint
static	UConst char sccsid[] =
"@(#)isort.c	1.19 09/07/11 J. Schilling from cdparanoia-III-alpha9.8";

#endif
/*
 * CopyPolicy: GNU Lesser General Public License v2.1 applies
 * Copyright (C) 1997-2001,2008 by Monty (xiphmont@mit.edu)
 * Copyright (C) 2002-2009 by J. Schilling
 *
 * sorted vector abstraction for paranoia
 *
 */

/*
 * Old isort got a bit complex.  This re-constrains complexity to
 * give a go at speed through a more alpha-6-like mechanism.
 */

/*
 * "Sort" is a bit of a misnomer in this implementation.  It's actually
 * basically a hash table of sample values (with a linked-list collision
 * resolution), which lets you quickly determine where in a vector a
 * particular sample value occurs.
 *
 * Collisions aren't due to hash collisions, as the table has one bucket
 * for each possible sample value.  Instead, the "collisions" represent
 * multiple occurrences of a given value.
 */

#include <schily/stdlib.h>
#include <schily/standard.h>
#include <schily/utypes.h>
#include <schily/string.h>
#include "p_block.h"
#include "isort.h"
#include "pmalloc.h"

EXPORT	sort_info	*sort_alloc	__PR((long size));
EXPORT	void		sort_unsortall	__PR((sort_info * i));
EXPORT	void		sort_free	__PR((sort_info * i));
LOCAL	void		sort_sort	__PR((sort_info * i,
						long sortlo, long sorthi));
EXPORT	void		sort_setup	__PR((sort_info * i,
						Int16_t * vector,
						long *abspos, long size,
						long sortlo, long sorthi));
EXPORT	sort_link	*sort_getmatch	__PR((sort_info * i,
						long post, long overlap,
						int value));
EXPORT	sort_link	*sort_nextmatch	__PR((sort_info * i, sort_link * prev));


/*
 * sort_alloc()
 *
 * Allocates and initializes a new, empty sort_info object, which can be
 * used to index up to (size) samples from a vector.
 */
EXPORT sort_info *
sort_alloc(size)
	long	size;
{
	sort_info	*ret = _pcalloc(1, sizeof (sort_info));

	ret->vector = NULL;
	ret->sortbegin = -1;
	ret->size = -1;
	ret->maxsize = size;

	ret->head = _pcalloc(65536, sizeof (sort_link *));
	ret->bucketusage = _pmalloc(65536 * sizeof (long));
	ret->revindex = _pcalloc(size, sizeof (sort_link));
	ret->lastbucket = 0;

	return (ret);
}

/*
 * sort_unsortall() (internal)
 *
 * This function resets the index for further use with a different vector
 * or range, without the overhead of an unnecessary free/alloc.
 */
EXPORT void
sort_unsortall(i)
	sort_info	*i;
{
	/*
	 * If there were few enough different samples encountered (and hence few
	 * enough buckets used), we can just zero out those buckets.  If there
	 * were many (2000 is picked somewhat arbitrarily), it's faster simply to
	 * zero out all buckets with a memset() rather than walking the data
	 * structure and zeroing them out one by one.
	 */
	if (i->lastbucket > 2000) {	/* a guess */
		memset(i->head, 0, 65536 * sizeof (sort_link *));
	} else {
		long	b;

		for (b = 0; b < i->lastbucket; b++)
			i->head[i->bucketusage[b]] = NULL;
	}

	i->lastbucket = 0;
	i->sortbegin = -1;

	/*
	 * Curiously, this function preserves the vector association created
	 * by sort_setup(), but it is used only internally by sort_setup, so
	 * preserving this association is unnecessary.
	 */
}

/*
 * sort_free()
 *
 * Releases all memory consumed by a sort_info object.
 */
EXPORT void
sort_free(i)
	sort_info	*i;
{
	_pfree(i->revindex);
	_pfree(i->head);
	_pfree(i->bucketusage);
	_pfree(i);
}

/*
 * sort_sort() (internal)
 *
 * This function builds the index to allow for fast searching for sample
 * values within a portion (sortlo - sorthi) of the object's associated
 * vector.  It is called internally and only when needed.
 */
LOCAL void
sort_sort(i, sortlo, sorthi)
	sort_info	*i;
	long		sortlo;
	long		sorthi;
{
	long	j;

	/*
	 * We walk backward through the range to index because we insert new
	 * samples at the head of each bucket's list.  At the end, they'll be
	 * sorted from first to last occurrence.
	 */
	for (j = sorthi - 1; j >= sortlo; j--) {
		/*
		 * i->vector[j] = the signed 16-bit sample to index.
		 * hv		= pointer to the head of the sorted list of occurences
		 *		  of this sample
		 * l		= the node to associate with this sample
		 *
		 * We add 32768 to convert the signed 16-bit integer to an unsigned
		 * range from 0 to 65535.
		 *
		 * Note that l is located within i->revindex at a position
		 * corresponding to the sample's position in the vector.  This allows
		 * ipos() to determine the sample position from a returned sort_link.
		 */
		sort_link	**hv = i->head + i->vector[j] + 32768;
		sort_link	 *l = i->revindex + j;

		/*
		 * If this is the first time we've encountered this sample, add its
		 * bucket to the list of buckets used.  This list is used only for
		 * resetting the index quickly.
		 */
		if (*hv == NULL) {
			i->bucketusage[i->lastbucket] = i->vector[j] + 32768;
			i->lastbucket++;
		}
		/*
		 * Point the new node at the old head, then assign the new node as
		 * the new head.
		 */
		l->next = *hv;
		*hv = l;
	}
	/*
	 * Mark the index as initialized.
	 */
	i->sortbegin = 0;
}

/*
 * sort_setup()
 *
 * This function initializes a previously allocated sort_info_t.  The
 * sort_info_t is associated with a vector of samples of length
 * (size), whose position begins at (*abspos) within the CD's stream
 * of samples.  Only the range of samples between (sortlo, sorthi)
 * will eventually be indexed for fast searching.  (sortlo, sorthi)
 * are absolute sample positions.
 *
 * Note: size *must* be <= i->maxsize given to the preceding sort_alloc(),
 * but no error checking is done here.
 */
EXPORT void
sort_setup(i, vector, abspos, size, sortlo, sorthi)
	sort_info	*i;
	Int16_t		*vector;
	long		*abspos;
	long		size;
	long		sortlo;
	long		sorthi;
{
	/*
	 * Reset the index if it has already been built.
	 */
	if (i->sortbegin != -1)
		sort_unsortall(i);

	i->vector = vector;
	i->size = size;
	i->abspos = abspos;

	/*
	 * Convert the absolute (sortlo, sorthi) to offsets within the vector.
	 * Note that the index will not be built until sort_getmatch() is called.
	 * Here we're simply hanging on to the range to index until then.
	 */
	i->lo = min(size, max(sortlo - *abspos, 0));
	i->hi = max(0, min(sorthi - *abspos, size));
}

/*
 * sort_getmatch()
 *
 * This function returns a sort_link_t pointer which refers to the
 * first sample equal to (value) in the vector.  It only searches for
 * hits within (overlap) samples of (post), where (post) is an offset
 * within the vector.  The caller can determine the position of the
 * matched sample using ipos(sort_info *, sort_link *).
 *
 * This function returns NULL if no matches were found.
 */
EXPORT sort_link *
sort_getmatch(i, post, overlap, value)
	sort_info	*i;
	long		post;
	long		overlap;
	int		value;
{
	sort_link	*ret;

	/*
	 * If the vector hasn't been indexed yet, index it now.
	 */
	if (i->sortbegin == -1)
		sort_sort(i, i->lo, i->hi);
	/*
	 * Now we reuse lo and hi
	 *
	 * We'll only return samples within (overlap) samples of (post).
	 * Clamp the boundaries to search to the boundaries of the array,
	 * convert the signed sample to an unsigned offset, and store the
	 * state so that future calls to sort_nextmatch do the right thing.
	 *
	 * Reusing lo and hi this way is awful.
	 */
	post = max(0, min(i->size, post));
	i->val = value + 32768;
	i->lo = max(0, post - overlap);		/* absolute position */
	i->hi = min(i->size, post + overlap);	/* absolute position */

	/*
	 * Walk through the linked list of samples with this value, until
	 * we find the first one within the bounds specified.  If there
	 * aren't any, return NULL.
	 */
	ret = i->head[i->val];
	while (ret) {
		/*
		 * ipos() calculates the offset (in terms of the original vector)
		 * of this hit.
		 */
		if (ipos(i, ret) < i->lo) {
			ret = ret->next;
		} else {
			if (ipos(i, ret) >= i->hi)
				ret = NULL;
			break;
		}
	}
/*	i->head[i->val]=ret; */
	return (ret);
}

/*
 * sort_nextmatch()
 *
 * This function returns a sort_link_t pointer which refers to the next sample
 * matching the criteria previously passed to sort_getmatch().  See
 * sort_getmatch() for details.
 *
 * This function returns NULL if no further matches were found.
 */
EXPORT sort_link *
sort_nextmatch(i, prev)
	sort_info	*i;
	sort_link	*prev;
{
	sort_link	*ret = prev->next;

	/*
	 * If there aren't any more hits, or we've passed the boundary requested
	 * of sort_getmatch(), we're done.
	 */
	if (!ret || ipos(i, ret) >= i->hi)
		return (NULL);
	return (ret);
}
Initial commit 2025-06-15 04:19:58 +08:00			`/* @(#)isort.c 1.19 09/07/11 J. Schilling from cdparanoia-III-alpha9.8 */`
			`#include <schily/mconfig.h>`
			`#ifndef lint`
			`static UConst char sccsid[] =`
			`"@(#)isort.c 1.19 09/07/11 J. Schilling from cdparanoia-III-alpha9.8";`

			`#endif`
			`/*`
			`* CopyPolicy: GNU Lesser General Public License v2.1 applies`
			`* Copyright (C) 1997-2001,2008 by Monty (xiphmont@mit.edu)`
			`* Copyright (C) 2002-2009 by J. Schilling`
			`*`
			`* sorted vector abstraction for paranoia`
			`*`
			`*/`

			`/*`
			`* Old isort got a bit complex. This re-constrains complexity to`
			`* give a go at speed through a more alpha-6-like mechanism.`
			`*/`

			`/*`
			`* "Sort" is a bit of a misnomer in this implementation. It's actually`
			`* basically a hash table of sample values (with a linked-list collision`
			`* resolution), which lets you quickly determine where in a vector a`
			`* particular sample value occurs.`
			`*`
			`* Collisions aren't due to hash collisions, as the table has one bucket`
			`* for each possible sample value. Instead, the "collisions" represent`
			`* multiple occurrences of a given value.`
			`*/`

			`#include <schily/stdlib.h>`
			`#include <schily/standard.h>`
			`#include <schily/utypes.h>`
			`#include <schily/string.h>`
			`#include "p_block.h"`
			`#include "isort.h"`
			`#include "pmalloc.h"`

			`EXPORT sort_info *sort_alloc __PR((long size));`
			`EXPORT void sort_unsortall __PR((sort_info * i));`
			`EXPORT void sort_free __PR((sort_info * i));`
			`LOCAL void sort_sort __PR((sort_info * i,`
			`long sortlo, long sorthi));`
			`EXPORT void sort_setup __PR((sort_info * i,`
			`Int16_t * vector,`
			`long *abspos, long size,`
			`long sortlo, long sorthi));`
			`EXPORT sort_link sort_getmatch __PR((sort_info i,`
			`long post, long overlap,`
			`int value));`
			`EXPORT sort_link sort_nextmatch __PR((sort_info i, sort_link * prev));`


			`/*`
			`* sort_alloc()`
			`*`
			`* Allocates and initializes a new, empty sort_info object, which can be`
			`* used to index up to (size) samples from a vector.`
			`*/`
			`EXPORT sort_info *`
			`sort_alloc(size)`
			`long size;`
			`{`
			`sort_info *ret = _pcalloc(1, sizeof (sort_info));`

			`ret->vector = NULL;`
			`ret->sortbegin = -1;`
			`ret->size = -1;`
			`ret->maxsize = size;`

			`ret->head = _pcalloc(65536, sizeof (sort_link *));`
			`ret->bucketusage = _pmalloc(65536 * sizeof (long));`
			`ret->revindex = _pcalloc(size, sizeof (sort_link));`
			`ret->lastbucket = 0;`

			`return (ret);`
			`}`

			`/*`
			`* sort_unsortall() (internal)`
			`*`
			`* This function resets the index for further use with a different vector`
			`* or range, without the overhead of an unnecessary free/alloc.`
			`*/`
			`EXPORT void`
			`sort_unsortall(i)`
			`sort_info *i;`
			`{`
			`/*`
			`* If there were few enough different samples encountered (and hence few`
			`* enough buckets used), we can just zero out those buckets. If there`
			`* were many (2000 is picked somewhat arbitrarily), it's faster simply to`
			`* zero out all buckets with a memset() rather than walking the data`
			`* structure and zeroing them out one by one.`
			`*/`
			`if (i->lastbucket > 2000) { /* a guess */`
			`memset(i->head, 0, 65536 * sizeof (sort_link *));`
			`} else {`
			`long b;`

			`for (b = 0; b < i->lastbucket; b++)`
			`i->head[i->bucketusage[b]] = NULL;`
			`}`

			`i->lastbucket = 0;`
			`i->sortbegin = -1;`

			`/*`
			`* Curiously, this function preserves the vector association created`
			`* by sort_setup(), but it is used only internally by sort_setup, so`
			`* preserving this association is unnecessary.`
			`*/`
			`}`

			`/*`
			`* sort_free()`
			`*`
			`* Releases all memory consumed by a sort_info object.`
			`*/`
			`EXPORT void`
			`sort_free(i)`
			`sort_info *i;`
			`{`
			`_pfree(i->revindex);`
			`_pfree(i->head);`
			`_pfree(i->bucketusage);`
			`_pfree(i);`
			`}`

			`/*`
			`* sort_sort() (internal)`
			`*`
			`* This function builds the index to allow for fast searching for sample`
			`* values within a portion (sortlo - sorthi) of the object's associated`
			`* vector. It is called internally and only when needed.`
			`*/`
			`LOCAL void`
			`sort_sort(i, sortlo, sorthi)`
			`sort_info *i;`
			`long sortlo;`
			`long sorthi;`
			`{`
			`long j;`

			`/*`
			`* We walk backward through the range to index because we insert new`
			`* samples at the head of each bucket's list. At the end, they'll be`
			`* sorted from first to last occurrence.`
			`*/`
			`for (j = sorthi - 1; j >= sortlo; j--) {`
			`/*`
			`* i->vector[j] = the signed 16-bit sample to index.`
			`* hv = pointer to the head of the sorted list of occurences`
			`* of this sample`
			`* l = the node to associate with this sample`
			`*`
			`* We add 32768 to convert the signed 16-bit integer to an unsigned`
			`* range from 0 to 65535.`
			`*`
			`* Note that l is located within i->revindex at a position`
			`* corresponding to the sample's position in the vector. This allows`
			`* ipos() to determine the sample position from a returned sort_link.`
			`*/`
			`sort_link **hv = i->head + i->vector[j] + 32768;`
			`sort_link *l = i->revindex + j;`

			`/*`
			`* If this is the first time we've encountered this sample, add its`
			`* bucket to the list of buckets used. This list is used only for`
			`* resetting the index quickly.`
			`*/`
			`if (*hv == NULL) {`
			`i->bucketusage[i->lastbucket] = i->vector[j] + 32768;`
			`i->lastbucket++;`
			`}`
			`/*`
			`* Point the new node at the old head, then assign the new node as`
			`* the new head.`
			`*/`
			`l->next = *hv;`
			`*hv = l;`
			`}`
			`/*`
			`* Mark the index as initialized.`
			`*/`
			`i->sortbegin = 0;`
			`}`

			`/*`
			`* sort_setup()`
			`*`
			`* This function initializes a previously allocated sort_info_t. The`
			`* sort_info_t is associated with a vector of samples of length`
			`* (size), whose position begins at (*abspos) within the CD's stream`
			`* of samples. Only the range of samples between (sortlo, sorthi)`
			`* will eventually be indexed for fast searching. (sortlo, sorthi)`
			`* are absolute sample positions.`
			`*`
			`* Note: size must be <= i->maxsize given to the preceding sort_alloc(),`
			`* but no error checking is done here.`
			`*/`
			`EXPORT void`
			`sort_setup(i, vector, abspos, size, sortlo, sorthi)`
			`sort_info *i;`
			`Int16_t *vector;`
			`long *abspos;`
			`long size;`
			`long sortlo;`
			`long sorthi;`
			`{`
			`/*`
			`* Reset the index if it has already been built.`
			`*/`
			`if (i->sortbegin != -1)`
			`sort_unsortall(i);`

			`i->vector = vector;`
			`i->size = size;`
			`i->abspos = abspos;`

			`/*`
			`* Convert the absolute (sortlo, sorthi) to offsets within the vector.`
			`* Note that the index will not be built until sort_getmatch() is called.`
			`* Here we're simply hanging on to the range to index until then.`
			`*/`
			`i->lo = min(size, max(sortlo - *abspos, 0));`
			`i->hi = max(0, min(sorthi - *abspos, size));`
			`}`

			`/*`
			`* sort_getmatch()`
			`*`
			`* This function returns a sort_link_t pointer which refers to the`
			`* first sample equal to (value) in the vector. It only searches for`
			`* hits within (overlap) samples of (post), where (post) is an offset`
			`* within the vector. The caller can determine the position of the`
			`* matched sample using ipos(sort_info , sort_link ).`
			`*`
			`* This function returns NULL if no matches were found.`
			`*/`
			`EXPORT sort_link *`
			`sort_getmatch(i, post, overlap, value)`
			`sort_info *i;`
			`long post;`
			`long overlap;`
			`int value;`
			`{`
			`sort_link *ret;`

			`/*`
			`* If the vector hasn't been indexed yet, index it now.`
			`*/`
			`if (i->sortbegin == -1)`
			`sort_sort(i, i->lo, i->hi);`
			`/*`
			`* Now we reuse lo and hi`
			`*`
			`* We'll only return samples within (overlap) samples of (post).`
			`* Clamp the boundaries to search to the boundaries of the array,`
			`* convert the signed sample to an unsigned offset, and store the`
			`* state so that future calls to sort_nextmatch do the right thing.`
			`*`
			`* Reusing lo and hi this way is awful.`
			`*/`
			`post = max(0, min(i->size, post));`
			`i->val = value + 32768;`
			`i->lo = max(0, post - overlap); /* absolute position */`
			`i->hi = min(i->size, post + overlap); /* absolute position */`

			`/*`
			`* Walk through the linked list of samples with this value, until`
			`* we find the first one within the bounds specified. If there`
			`* aren't any, return NULL.`
			`*/`
			`ret = i->head[i->val];`
			`while (ret) {`
			`/*`
			`* ipos() calculates the offset (in terms of the original vector)`
			`* of this hit.`
			`*/`
			`if (ipos(i, ret) < i->lo) {`
			`ret = ret->next;`
			`} else {`
			`if (ipos(i, ret) >= i->hi)`
			`ret = NULL;`
			`break;`
			`}`
			`}`
			`/* i->head[i->val]=ret; */`
			`return (ret);`
			`}`

			`/*`
			`* sort_nextmatch()`
			`*`
			`* This function returns a sort_link_t pointer which refers to the next sample`
			`* matching the criteria previously passed to sort_getmatch(). See`
			`* sort_getmatch() for details.`
			`*`
			`* This function returns NULL if no further matches were found.`
			`*/`
			`EXPORT sort_link *`
			`sort_nextmatch(i, prev)`
			`sort_info *i;`
			`sort_link *prev;`
			`{`
			`sort_link *ret = prev->next;`

			`/*`
			`* If there aren't any more hits, or we've passed the boundary requested`
			`* of sort_getmatch(), we're done.`
			`*/`
			`if (!ret \|\| ipos(i, ret) >= i->hi)`
			`return (NULL);`
			`return (ret);`
			`}`