Divider
  Speech Technology and Research Laboratory
  People
  Current Research Activities
  Past Research Activities
  Publications
  Career Opportunities
  Seminars
  Technologies for License
  In the News
  Contact Us
  STAR Search
  Information and Computing Sciences Division
SpacerAbout UsDividerR and D DivisionsDividerCareersDividerNewsroomDividerContact UsDividerSRI HomeSpacer

Spacer
         
  SRI Logo

Search SRILM-USER Archives

Match: Format: Sort by:
Search:

Re: Modifying ngram.cc

From: lambert mathias <lambert at ADDRESS HIDDEN>
Date: Tue, 19 Jul 2005 17:04:19 -0400

Hi,

I tried using the copy constructors in srilm-1.4.5.
I initialized my LM as
Ngram useLM(ngramLM);
However, when I do a useLM->write(), the output shows some of the entried as
(null).

\data\
ngram 1=6
ngram 2=24
ngram 3=96

\1-grams:
0       (null)  -99
0       (null)  -99
-0.6083246      3       0
-0.6093718      4       0
-1.78533        </s>
-99     <s>     -99

\2-grams:
0       3 (null)        -99
0       3 (null)        -99
-0.6108742      3 3     0
-0.6078538      3 4     3.818703
-1.778024       3 </s>
............

I am trying to figure out if there is a bug in the copy constructor
initialization.

Lambert

On 7/17/05 8:47 PM, "Andreas Stolcke" <stolcke at ADDRESS HIDDEN> wrote:

> The problem was that copy constructors (and assignment operators)
> for the container data structures (Array, SArray, LHash) weren't implemented.
> I fixed that in the current development version.
> No changes to any of the higher-level classes (like Ngram) is needed,
> as C++ automatically (and recursively) defines copy constructors based on the
> member copy constructors.
>
> You can either replace the attached source files (in dstruct/src), or
> download the "1.4.5 (Beta)" version from the web page.
>
> --Andreas
>
> In message <BEF415D2.332F%lambert at ADDRESS HIDDEN>you wrote:
>> Hi,
>>
>> This is kind of a C++ question
>> I wrote the following copy constructor in Ngram.h
>> Ngram(const Ngram
>> &ng):LM(ng.vocab),contexts(ng.contexts),order(ng.order),_skipOOVs(ng._skipOO
>> Vs),_trustTotals(ng._trustTotals){};
>>
>> I have the following declarations
>>
>> Ngram ngramLM(vocab*,order);
>>
>> While(//<some condition>)
>>     Ngram useLM(ngramLM);
>>
>>     // Do some stuff with useLM
>>     .........
>>     ........
>> }
>>
>> The problem is that the assignment useLM=ngramLM doesn't assign the original
>> ngramLM. Any changes I make to useLM shows up in ngramLM too.
>> I just want to make a copy (useLM) of the original ngramLM, work on that
>> copy and then reinitialize another useLM with the original ngramLM.
>>
>> Any thoughts?
>>
>>
>> Lambert
>>
>
> /*
>  * Array.h --
>  * Extensible array class
>  *
>  * Copyright (c) 1995-2005 SRI International.  All Rights Reserved.
>  *
>  * @(#)$Header: /home/srilm/devel/dstruct/src/RCS/Array.h,v 1.11 2005/07/17
> 22:12:21 stolcke Exp $
>  *
>  */
>
> #ifndef _Array_h_
> #define _Array_h_
>
> #include <assert.h>
>
> #include "MemStats.h"
>
> template <class DataT>
> class Array
> {
> public:
>     Array(int base = 0, unsigned int size = 0)
> : _base(base), _size(size), _data(0), alloc_size(0)
> { if (size > 0) { alloc(size-1); } };
>     Array(Array<DataT> &source)
> : _base(source._base), _size(0), _data(0), alloc_size(0)
> { *this = source; }
>
>     ~Array() { delete [] _data; } ;
>
>     DataT &operator[](int index)
> { int offset = index - _base; assert(offset >= 0);
>  if (offset >= _size) {
>    _size = offset + 1;
>    if (offset >= alloc_size) { alloc(offset); }
>  }
>  return _data[offset];
> };
>
>     Array<DataT> & operator= (const Array<DataT> &other);
>
>     DataT *data() const { return _data - _base; };
>     int base() const { return _base; }
>     unsigned int size() const { return _size; }
>
>     void memStats(MemStats &stats) const;
>
> private:
>     unsigned int _size;  /* used size */
>     int _base;
>     DataT *_data;
>     unsigned int alloc_size; /* allocated size */
>     void alloc(unsigned int size);
> };
>
> #endif /* _Array_h_ */
> /*
>  * LHash.cc --
>  * Linear search hash table implementation
>  *
>  */
>
> #ifndef _LHash_cc_
> #define _LHash_cc_
>
> #ifndef lint
> static char LHash_Copyright[] = "Copyright (c) 1995-2005 SRI International.
> All Rights Reserved.";
> static char LHash_RcsId[] = "@(#)$Header:
> /home/srilm/devel/dstruct/src/RCS/LHash.cc,v 1.46 2005/07/17 22:01:31 stolcke
> Exp $";
> #endif
>
> #include <iostream.h>
> #include <stdlib.h>
> #include <string.h>
> #include <assert.h>
> #include <new.h>
>
> #include "LHash.h"
>
> #undef INSTANTIATE_LHASH
> #define INSTANTIATE_LHASH(KeyT, DataT) \
> template <> DataT *LHash< KeyT, DataT >::removedData = 0; \
> template class LHash< KeyT, DataT >; \
> template class LHashIter< KeyT, DataT >
>
> #ifndef __GNUG__
> template <class KeyT, class DataT>
> DataT *LHash<KeyT,DataT>::removedData = 0;
> #endif /* __GNUG__ */
>
> const unsigned minHashBits = 3;  /* minimum no. bits for hashing
> * tables smaller than this use linear
> * search to save space */
> const float fillRatio = 0.8;  /* fill ration at which the table is
> * expanded and rehashed */
>
> #define BODY(b) ((LHashBody<KeyT,DataT> *)b)
>
> /*
>  * Dump the entire hash array to cerr.  Unused slots are printed as "FREE".
>  */
> template <class KeyT, class DataT>
> void
> LHash<KeyT,DataT>::dump() const
> {
>     if (body) {
> unsigned maxEntries = hashSize(BODY(body)->maxBits);
> unsigned i;
>
> for (i = 0; i < maxEntries; i++) {
>    cerr << " " << i << ": ";
>    if (Map_noKeyP(BODY(body)->data[i].key)) {
> cerr << "FREE";
>    } else {
> cerr << BODY(body)->data[i].key
> #ifdef DUMP_VALUES
> /*
> * Only certain types can be printed.
> */
>     << "->" << BODY(body)->data[i].value
> #endif /* DUMP_VALUES */
>     ;
>    }
> }
>     } else {
> cerr << "EMPTY";
>     }
>     cerr << endl;
> }
>
> template <class KeyT, class DataT>
> void
> LHash<KeyT,DataT>::memStats(MemStats &stats) const
> {
>     stats.total += sizeof(*this);
>     if (body) {
>         unsigned maxEntries = hashSize(BODY(body)->maxBits);
>
> stats.total += sizeof(*BODY(body)) +
> sizeof(BODY(body)->data[0]) *
> (maxEntries - 1);
> stats.wasted += sizeof(BODY(body)->data[0]) *
> (maxEntries - BODY(body)->nEntries);
>     }
> }
>
> /*
>  * Compute the actual minimum size required for a given number of entries
>  */
> static inline unsigned
> roundSize(unsigned size)
> {
>     if (size < hashSize(minHashBits)) {
> return size;
>     } else {
> return (unsigned)((size + 1)/ fillRatio);
>     }
> }
>
> template <class KeyT, class DataT>
> void
> LHash<KeyT,DataT>::alloc(unsigned size)
> {
>     unsigned maxBits, maxEntries;
>     unsigned i;
>
>     /*
>      * round up to power of two
>      */
>     maxBits = 0;
>     while (hashSize(maxBits) < size) {
> assert(maxBits < LHash_maxBitLimit);
> maxBits++;
>     }
>
>     maxEntries = hashSize(maxBits);
>
>     //cerr << "LHash::alloc: current " << (body ? BODY(body)->nEntries : 0)
>     //  << ", requested " << size
>     //  << ", allocating " << maxEntries << " (" << maxBits << ")\n";
>
>     body = (LHashBody<KeyT,DataT> *)malloc(sizeof(*BODY(body)) +
>       (maxEntries - 1) * sizeof(BODY(body)->data[0]));
>     assert(body != 0);
>
>     BODY(body)->maxBits = maxBits;
>     BODY(body)->nEntries = 0;
>
>     for (i = 0; i < maxEntries; i++) {
> Map_noKey(BODY(body)->data[i].key);
>     }
>     //cerr << "memory for header = " <<
>     //  ((void *)&(BODY(body)->data[0]) - (void *)BODY(body)) << endl;
>     //cerr << "memory per entry = " <<
>     //  ((void *)&(BODY(body)->data[3]) - (void *)&(BODY(body)->data[2])) <<
> endl;
> }
>
> template <class KeyT, class DataT>
> LHash<KeyT,DataT>::LHash(unsigned size)
>     : body(0)
> {
>     if (size != 0) {
> /*
> * determine actual initial size
> */
> alloc(roundSize(size));
>     }
> }
>
> template <class KeyT, class DataT>
> void
> LHash<KeyT,DataT>::clear(unsigned size)
> {
>     if (body) {
> unsigned maxEntries = hashSize(BODY(body)->maxBits);
> unsigned i;
>
> for (i = 0; i < maxEntries; i++) {
>    if (! Map_noKeyP(BODY(body)->data[i].key)) {
> Map_freeKey(BODY(body)->data[i].key);
>    }
> }
> free(body);
> body = 0;
>     }
>     if (size != 0) {
> alloc(roundSize(size));
>     }
> }
>
> template <class KeyT, class DataT>
> LHash<KeyT,DataT>::~LHash()
> {
>     clear(0);
> }
>
> template <class KeyT, class DataT>
> LHash<KeyT,DataT>::LHash(const LHash<KeyT,DataT> &source)
>     : body(0)
> {
> #ifdef DEBUG
>     cerr << "warning: LHash copy constructor called\n";
> #endif
>     *this = source;
> }
>
> template <class KeyT, class DataT>
> LHash<KeyT,DataT> &
> LHash<KeyT,DataT>::operator= (const LHash<KeyT,DataT> &other)
> {
> #ifdef DEBUG
>     cerr << "warning: LHash::operator= called\n";
> #endif
>
>     if (&other == this) {
> return *this;
>     }
>
>     /*
>      * copy hash entries from old to new
>      */
>     if (other.body) {
> unsigned maxEntries = hashSize(BODY(other.body)->maxBits);
> clear(maxEntries);
>
> for (unsigned i = 0; i < maxEntries; i++) {
>    KeyT thisKey = BODY(other.body)->data[i].key;
>
>    if (!Map_noKeyP(thisKey)) {
> /*
> * Copy key
> */
> BODY(body)->data[i].key = Map_copyKey(thisKey);
>
> /*
> * Initialize data, required for = operator
> */
> new (&(BODY(body)->data[i].value)) DataT;
>
> /*
> * Copy data
> */
> BODY(body)->data[i].value = BODY(other.body)->data[i].value;
>    }
> }
> BODY(body)->nEntries = BODY(other.body)->nEntries;
>     } else {
> clear(0);
>     }
>
>     return *this;
> }
>
> /*
>  * Returns index into data array that has the key which is either
>  * equal to key, or indicates the place where key would be placed if it
>  * occurred in the array.
>  */
> template <class KeyT, class DataT>
> Boolean
> LHash<KeyT,DataT>::locate(KeyT key, unsigned &index) const
> {
>     assert(!Map_noKeyP(key));
>
>     if (body) {
> unsigned maxBits = BODY(body)->maxBits;
> register MapEntry<KeyT,DataT> *data = BODY(body)->data;
>
> if (maxBits < minHashBits) {
>    /*
>     * Do a linear search
>     */
>    unsigned nEntries = BODY(body)->nEntries;
>    register unsigned i;
>
>    for (i = 0; i < nEntries; i++) {
> if (LHash_equalKey(data[i].key, key)) {
>    index = i;
>    return true;
> }
>    }
>    index = i;
>    return false;
> } else {
>    /*
>     * Do a hashed search
>     */
>    unsigned hash = LHash_hashKey(key, maxBits);
>    unsigned i;
>
>    for (i = hash; ; i = (i + 1) & hashMask(maxBits))
>    {
> if (Map_noKeyP(data[i].key)) {
>    index = i;
>    return false;
> } else if (LHash_equalKey(data[i].key, key)) {
>    index = i;
>    return true;
> }
>    }
> }
>     } else {
> return false;
>     }
> }
>
> template <class KeyT, class DataT>
> DataT *
> LHash<KeyT,DataT>::find(KeyT key, Boolean &foundP) const
> {
>     unsigned index;
>
>     if (foundP = locate(key, index)) {
> return &(BODY(body)->data[index].value);
>     } else {
> return 0;
>     }
> }
>
> template <class KeyT, class DataT>
> KeyT
> LHash<KeyT,DataT>::getInternalKey(KeyT key, Boolean &foundP) const
> {
>     unsigned index;
>     static KeyT zeroKey;
>
>     if (foundP = locate(key, index)) {
> return BODY(body)->data[index].key;
>     } else {
> return zeroKey;
>     }
> }
>
> template <class KeyT, class DataT>
> DataT *
> LHash<KeyT,DataT>::insert(KeyT key, Boolean &foundP)
> {
>     unsigned index;
>
>     assert(!(Map_noKeyP(key)));
>
>     /*
>      * Make sure there is room for at least one entry
>      */
>     if (body == 0) {
> alloc(1);
>     }
>
>     if (foundP = locate(key, index)) {
> return &(BODY(body)->data[index].value);
>     } else {
> unsigned maxEntries = hashSize(BODY(body)->maxBits);
> unsigned nEntries = BODY(body)->nEntries;
>
> /*
> * Rehash table if necessary
> */
> unsigned minSize = roundSize(nEntries + 1);
>
> if (minSize > maxEntries) {
>    LHashBody<KeyT,DataT> *oldBody = BODY(body);
>    unsigned i;
>
>    /*
>     * Since LHash_maxEntriesLimit is a power of two minus 1
>     * we need to check this only when the array is enlarged
>     */
>    assert(nEntries < LHash_maxEntriesLimit);
>
>    alloc(minSize);
>    BODY(body)->nEntries = nEntries;
>
>    if (BODY(body)->maxBits < minHashBits) {
> /*
> * Just copy old entries to new storage, no reindexing
> * required.
> */
> memcpy(BODY(body)->data, oldBody->data,
> sizeof(oldBody->data[0]) * nEntries);
>    } else {
> /*
> * Rehash
> */
> for (i = 0; i < maxEntries; i++) {
>    KeyT key = oldBody->data[i].key;
>
>    if (! Map_noKeyP(key)) {
> (void)locate(key, index);
> memcpy(&(BODY(body)->data[index]), &(oldBody->data[i]),
> sizeof(oldBody->data[0]));
>    }
> }
>    }
>    free(oldBody);
>
>    /*
>     * Entries have been moved, so have to re-locate key
>     */
>    (void)locate(key, index);
> }
>
> BODY(body)->data[index].key = Map_copyKey(key);
>
> /*
> * Initialize data to zero, but also call constructors, if any
> */
> memset(&(BODY(body)->data[index].value), 0,
> sizeof(BODY(body)->data[index].value));
> new (&(BODY(body)->data[index].value)) DataT;
>
> BODY(body)->nEntries++;
>
> return &(BODY(body)->data[index].value);
>     }
> }
>
> template <class KeyT, class DataT>
> DataT *
> LHash<KeyT,DataT>::remove(KeyT key, Boolean &foundP)
> {
>     unsigned index;
>
>     /*
>      * Allocate pseudo-static memory for removed objects (returned by
>      * remove()). We cannot make this static because otherwise
>      * the destructor for DataT would be called on it at program exit.
>      */
>     if (removedData == 0) {
> removedData = (DataT *)malloc(sizeof(DataT));
> assert(removedData != 0);
>     }
>
>     if (foundP = locate(key, index)) {
> Map_freeKey(BODY(body)->data[index].key);
> Map_noKey(BODY(body)->data[index].key);
> memcpy(removedData, &BODY(body)->data[index].value, sizeof(DataT));
>
> if (BODY(body)->maxBits < minHashBits) {
>    /*
>     * Linear search mode -- Just move all entries above the
>     * the one removed to fill the gap.
>     */
>    unsigned nEntries = BODY(body)->nEntries;
>
>    memmove(&BODY(body)->data[index],
>    &BODY(body)->data[index+1],
>    (nEntries - index - 1) * sizeof(BODY(body)->data[0]));
>    Map_noKey(BODY(body)->data[nEntries - 1].key);
> } else {
>    /*
>     * The entry after the one being deleted could actually
>     * belong to a prior spot in the table, but was bounced forward due
>     * to a collision.   The invariant used in lookup is that
>     * all locations between the hash index and the actual index are
>     * filled.  Hence we examine all entries between the deleted
>     * position and the next free position as whether they need to
>     * be moved backward.
>     */
>    while (1) {
> unsigned newIndex;
>
> index = (index + 1) & hashMask(BODY(body)->maxBits);
>
> if (Map_noKeyP(BODY(body)->data[index].key)) {
>    break;
> }
>
> /*
> * If find returns false that means the deletion has
> * introduced a hole in the table that would prevent
> * us from finding the next entry. Luckily, find
> * also tells us where the hole is.  We move the
> * entry to its rightful place and continue filling
> * the hole created by this move.
> */
> if (!locate(BODY(body)->data[index].key, newIndex)) {
>    memcpy(&(BODY(body)->data[newIndex]),
>   &(BODY(body)->data[index]),
>   sizeof(BODY(body)->data[0]));
>    Map_noKey(BODY(body)->data[index].key);
> }
>    }
> }
> BODY(body)->nEntries--;
> return removedData;
>     } else {
> return 0;
>     }
> }
>
> #ifdef __GNUG__
> static
> #endif
> int (*LHash_thisKeyCompare)();  /* used by LHashIter() to communicate
> * with compareIndex() */
> #ifdef __GNUG__
> static
> #endif
> void *LHash_thisBody;   /* ditto */
>
> template <class KeyT, class DataT>
> void
> LHashIter<KeyT,DataT>::sortKeys()
> {
>     /*
>      * Store keys away and sort them to the user's orders.
>      */
>     unsigned maxEntries = hashSize(myLHashBody->maxBits);
>
>     unsigned *sortedIndex = new unsigned[numEntries];
>     assert(sortedIndex != 0);
>
>     unsigned i;
>
>     unsigned j = 0;
>     for (i = 0; i < maxEntries; i++) {
> if (!Map_noKeyP(myLHashBody->data[i].key)) {
>    sortedIndex[j++] = i;
> }
>     }
>     assert(j == numEntries);
>
>     /*
>      * Due to the limitations of the qsort interface we have to
>      * pass extra information to compareIndex in these global
>      * variables - yuck.
>      */
>     LHash_thisKeyCompare = (int(*)())sortFunction;
>     LHash_thisBody = myLHashBody;
>     qsort(sortedIndex, numEntries, sizeof(*sortedIndex), compareIndex);
>
>     /*
>      * Save the keys for enumeration.  The reason we save the keys,
>      * not the indices, is that indices may change during enumeration
>      * due to deletions.
>      */
>     sortedKeys = new KeyT[numEntries];
>     assert(sortedKeys != 0);
>
>     for (i = 0; i < numEntries; i++) {
> sortedKeys[i] = myLHashBody->data[sortedIndex[i]].key;
>     }
>
>     delete [] sortedIndex;
> }
>
> template <class KeyT, class DataT>
> LHashIter<KeyT,DataT>::LHashIter(const LHash<KeyT,DataT> &lhash,
>    int (*keyCompare)(KeyT, KeyT))
>     : myLHashBody(BODY(lhash.body)), current(0),
>       numEntries(lhash.numEntries()), sortFunction(keyCompare)
> {
>     /*
>      * Note: we access the underlying LHash through the body pointer,
>      * not the top-level LHash itself.  This allows the top-level object
>      * to be moved without affecting an ongoing iteration.
>      * XXX: This only works because
>      * - it is illegal to insert while iterating
>      * - deletions don't cause reallocation of the data
>      */
>     if (sortFunction && myLHashBody) {
> sortKeys();
>     } else {
> sortedKeys = 0;
>     }
> }
>
> /*
>  * This is the callback function passed to qsort for comparing array
>  * entries. It is passed the indices into the data array, which are
>  * compared according to the user-supplied function applied to the
>  * keys found at those locations.
>  */
> template <class KeyT, class DataT>
> int
> LHashIter<KeyT,DataT>::compareIndex(const void *idx1, const void *idx2)
> {
>     return (*(compFnType)LHash_thisKeyCompare)
>    (BODY(LHash_thisBody)->data[*(unsigned *)idx1].key,
>     BODY(LHash_thisBody)->data[*(unsigned *)idx2].key);
> }
>
> template <class KeyT, class DataT>
> LHashIter<KeyT,DataT>::~LHashIter()
> {
>     delete [] sortedKeys;
> }
>
>
> template <class KeyT, class DataT>
> void
> LHashIter<KeyT,DataT>::init()
> {
>     delete [] sortedKeys;
>
>     current = 0;
>
>     {
> /*
> * XXX: fake LHash object to access numEntries()
> */
> LHash<KeyT,DataT> myLHash(0);
>
> myLHash.body = myLHashBody;
> numEntries = myLHash.numEntries();
> myLHash.body = 0;
>     }
>
>     if (sortFunction && myLHashBody) {
> sortKeys();
>     } else {
> sortedKeys = 0;
>     }
> }
>
> template <class KeyT, class DataT>
> DataT *
> LHashIter<KeyT,DataT>::next(KeyT &key)
> {
>
>     if (myLHashBody == 0) {
> return 0;
>     } else {
> unsigned index;
>
> if (sortedKeys) {
>    /*
>     * Sorted enumeration -- advance to next key in sorted list
>     */
>    if (current == numEntries) {
> return 0;
>    }
>
>    /*
>     * XXX: fake LHash object to access locate()
>     */
>    LHash<KeyT,DataT> myLHash(0);
>
>    myLHash.body = myLHashBody;
>    myLHash.locate(sortedKeys[current++], index);
>    myLHash.body = 0;;
> } else {
>    /*
>     * Detect deletions by comparing old and current number of
>     * entries.
>     * A legal deletion can only affect the current entry, so
>     * adjust the current position to reflect the next entry was
>     * moved.
>     */
>    if (myLHashBody->nEntries != numEntries) {
> assert(myLHashBody->nEntries == numEntries - 1);
>
> numEntries --;
> current --;
>    }
>
>    /*
>     * Random enumeration mode
>     */
>    unsigned maxBits = myLHashBody->maxBits;
>
>    if (maxBits < minHashBits) {
> /*
> * Linear search mode - advance one to the next entry
> */
> if (current == numEntries) {
>    return 0;
> }
>    } else {
> unsigned maxEntries = hashSize(maxBits);
>
> while (current < maxEntries &&
>       Map_noKeyP(myLHashBody->data[current].key))
> {
>    current++;
> }
>
> if (current == maxEntries) {
>    return 0;
> }
>    }
>
>    index = current ++;
> }
>
> key = myLHashBody->data[index].key;
> return &(myLHashBody->data[index].value);
>     }
> }
>
> #undef BODY
>
> #endif /* _LHash_cc_ */
> /*
>  * SArray.cc --
>  * Sorted array implementation
>  *
>  */
>
> #ifndef _SArray_cc_
> #define _SArray_cc_
>
> #ifndef lint
> static char SArray_Copyright[] = "Copyright (c) 1995-2005 SRI International.
> All Rights Reserved.";
> static char SArray_RcsId[] = "@(#)$Header:
> /home/srilm/devel/dstruct/src/RCS/SArray.cc,v 1.37 2005/07/17 22:01:31 stolcke
> Exp $";
> #endif
>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <assert.h>
> #include <new.h>
>
> #include "SArray.h"
>
> #undef INSTANTIATE_SARRAY
> #define INSTANTIATE_SARRAY(KeyT, DataT) \
> template <> DataT *SArray< KeyT, DataT >::removedData = 0; \
> template class SArray< KeyT, DataT >; \
> template class SArrayIter< KeyT, DataT >
>
> #ifndef __GNUG__
> template <class KeyT, class DataT>
> DataT *SArray<KeyT,DataT>::removedData = 0;
> #endif /* __GNUG__ */
>
> #define BODY(b) ((SArrayBody<KeyT,DataT> *)b)
>
> const double growSize = 1.1;
>
> template <class KeyT, class DataT>
> void
> SArray<KeyT,DataT>::dump() const
> {
>     if (body == 0) {
> cerr << "EMPTY" << endl;
>     } else {
> unsigned nEntries = numEntries();
>
> cerr << "maxEntries = " << BODY(body)->maxEntries;
> cerr << " nEntries = " << nEntries;
>
> for (unsigned i = 0; i < nEntries; i++) {
>    cerr << " " << i << ": ";
>    cerr << BODY(body)->data[i].key
> #ifdef DUMP_VALUES
> /*
> * Only certain types can be printed.
> */
>     << "->" << BODY(body)->data[i].value
> #endif /* DUMP_VALUES */
>     ;
> }
>     }
> }
>
> template <class KeyT, class DataT>
> void
> SArray<KeyT,DataT>::memStats(MemStats &stats) const
> {
>     stats.total += sizeof(*this);
>
>     if (body) {
> unsigned maxEntries = BODY(body)->maxEntries;
>
> stats.total += sizeof(*BODY(body)) +
> sizeof(BODY(body)->data[0]) *
> (maxEntries - 1);
> stats.wasted += sizeof(BODY(body)->data[0]) *
> (maxEntries - numEntries());
>     }
> }
>
> template <class KeyT, class DataT>
> void
> SArray<KeyT,DataT>::alloc(unsigned size)
> {
>     body = (SArrayBody<KeyT,DataT> *)malloc(sizeof(*BODY(body)) +
>   (size - 1) * sizeof(BODY(body)->data[0]));
>     assert(body != 0);
>
>     BODY(body)->deleted = 0;
>     BODY(body)->maxEntries = size;
>
>     /*
>      * Fill the array with dummy keys, marking the unused space
>      */
>     for (unsigned i = 0; i < size; i ++) {
> Map_noKey(BODY(body)->data[i].key);
>     }
> }
>
> template <class KeyT, class DataT>
> SArray<KeyT,DataT>::SArray(unsigned size)
>     : body(0)
> {
>     if (size != 0) {
> alloc(size);
>     }
> }
>
> template <class KeyT, class DataT>
> void
> SArray<KeyT,DataT>::clear(unsigned size)
> {
>     if (body) {
> unsigned maxEntries = BODY(body)->maxEntries;
>
> for (unsigned i = 0; i < maxEntries; i++) {
>    KeyT thisKey = BODY(body)->data[i].key;
>
>    if (Map_noKeyP(thisKey)) {
> break;
>    }
>    Map_freeKey(thisKey);
> }
> free(body);
> body = 0;
>     }
>     if (size != 0) {
> alloc(size);
>     }
> }
>
> template <class KeyT, class DataT>
> SArray<KeyT,DataT>::~SArray()
> {
>     clear(0);
> }
>
> template <class KeyT, class DataT>
> unsigned
> SArray<KeyT,DataT>::numEntries() const
> {
>     if (body == 0) {
> return 0;
>     } else if (Map_noKeyP(BODY(body)->data[0].key)) {
> return 0;
>     } else {
> /*
> * Do a binary search for the end of the used memory
> * lower always points to a filled entry
> * upper always points to a free entry beyond the end of used entries
> */
> unsigned lower, upper;
>
> lower = 0;   /* minimum index (inclusive) */
> upper = BODY(body)->maxEntries; /* maximum index (exclusive) */
>
> while (lower + 1 < upper) {
>    unsigned middle = (lower + upper)/2;
>
>    if (Map_noKeyP(BODY(body)->data[middle].key)) {
>    upper = middle;
>    } else {
>    lower = middle;
>    }
> }
>
> /* lower + 1 == upper */
> return upper;
>     }
> }
>
> template <class KeyT, class DataT>
> SArray<KeyT,DataT>::SArray(const SArray<KeyT,DataT> &source)
>     : body(0)
> {
> #ifdef DEBUG
>     cerr << "warning: SArray copy constructor called\n";
> #endif
>     *this = source;
> }
>
> template <class KeyT, class DataT>
> SArray<KeyT,DataT> &
> SArray<KeyT,DataT>::operator= (const SArray<KeyT,DataT> &other)
> {
> #ifdef DEBUG
>     cerr << "warning: SArray::operator= called\n";
> #endif
>
>     if (&other == this) {
> return *this;
>     }
>
>     /*
>      * copy array entries from old to new
>      */
>     if (other.body) {
> unsigned maxEntries = BODY(other.body)->maxEntries;
> clear(maxEntries);
>
> for (unsigned i = 0; i < maxEntries; i++) {
>    KeyT thisKey = BODY(other.body)->data[i].key;
>
>    if (Map_noKeyP(thisKey)) {
> break;
>    }
>
>    /*
>     * Copy key
>     */
>    BODY(body)->data[i].key = Map_copyKey(thisKey);
>
>    /*
>     * Initialize data, required for = operator
>     */
>    new (&(BODY(body)->data[i].value)) DataT;
>
>    /*
>     * Copy data
>     */
>    BODY(body)->data[i].value = BODY(other.body)->data[i].value;
>
> }
>     } else {
> clear(0);
>     }
>
>     return *this;
> }
>
> /*
>  * Returns index into data array that has the key which is either
>  * equal to key, or indicates the place where key would be placed if it
>  * occurred in the array.
>  */
> template <class KeyT, class DataT>
> Boolean
> SArray<KeyT,DataT>::locate(KeyT key, unsigned &index) const
> {
>     assert(!Map_noKeyP(key));
>
>     if (body) {
> unsigned lower, upper;
>
> lower = 0;   /* minimum index (inclusive) */
> upper = BODY(body)->maxEntries; /* maximum index (exclusive) */
>
> while (lower < upper) {
>    unsigned middle = (lower + upper)/2;
>
>    KeyT thisKey = BODY(body)->data[middle].key;
>
>    if (Map_noKeyP(thisKey)) {
> upper = middle;
> continue;
>    }
>    
>    int compare = SArray_compareKey(key, thisKey);
>
>    if (compare < 0) {
>    upper = middle;
>    } else if (compare > 0) {
>    lower = middle + 1;
>    } else {
>    index = middle;
>    return true;
>    }
> }
>
> /* we have lower == upper */
> if (lower == BODY(body)->maxEntries ||
>    Map_noKeyP(BODY(body)->data[lower].key) ||
>    SArray_compareKey(key, BODY(body)->data[lower].key) < 0)
> {
>    index = lower;
> } else  {
>    index = lower + 1;
> }
> return false;
>     } else {
> return false;
>     }
> }
>
> template <class KeyT, class DataT>
> DataT *
> SArray<KeyT,DataT>::find(KeyT key, Boolean &foundP) const
> {
>     unsigned index;
>
>     if (foundP = locate(key, index)) {
> return &(BODY(body)->data[index].value);
>     } else {
> return 0;
>     }
> }
>
> template <class KeyT, class DataT>
> KeyT
> SArray<KeyT,DataT>::getInternalKey(KeyT key, Boolean &foundP) const
> {
>     unsigned index;
>     static KeyT zeroKey;
>
>     if (foundP = locate(key, index)) {
> return BODY(body)->data[index].key;
>     } else {
> return zeroKey;
>     }
> }
>
> template <class KeyT, class DataT>
> DataT *
> SArray<KeyT,DataT>::insert(KeyT key, Boolean &foundP)
> {
>     unsigned index;
>
>     /*
>      * Make sure there is room for at least one entry
>      */
>     if (body == 0) {
> alloc(1);
>     }
>
>     if (foundP = locate(key, index)) {
> return &(BODY(body)->data[index].value);
>     } else {
> unsigned nEntries = numEntries();
> unsigned maxEntries = BODY(body)->maxEntries;
>
> /*
> * Need to add an element.
> * First, enlarge array if necessary
> */
> if (nEntries == maxEntries) {
>    maxEntries = (int)(maxEntries * growSize);
>    if (maxEntries == nEntries) {
> maxEntries ++;
>    }
>    body = (SArrayBody<KeyT,DataT> *)realloc(body,
> sizeof(*BODY(body)) +
>   (maxEntries - 1) * sizeof(BODY(body)->data[0]));
>    assert(body != 0);
>
>    /*
>     * Fill new space with dummy keys
>     */
>    for (unsigned i = BODY(body)->maxEntries; i < maxEntries; i ++) {
> Map_noKey(BODY(body)->data[i].key);
>    }
>
>    BODY(body)->maxEntries = maxEntries;
> }
>
> memmove(&(BODY(body)->data[index + 1]),
> &(BODY(body)->data[index]),
> (nEntries - index) * sizeof(BODY(body)->data[0]));
>
> BODY(body)->data[index].key = Map_copyKey(key);
>
> /*
> * Initialize data to zero, but also call constructors, if any
> */
> memset(&(BODY(body)->data[index].value), 0,
>   sizeof(BODY(body)->data[index].value));
> new (&(BODY(body)->data[index].value)) DataT;
>
> return &(BODY(body)->data[index].value);
>     }
> }
>  
> template <class KeyT, class DataT>
> DataT *
> SArray<KeyT,DataT>::remove(KeyT key, Boolean &foundP)
> {
>     unsigned index;
>
>     /*
>      * Allocate pseudo-static memory for removed objects (returned by
>      * remove()). We cannot make this static because otherwise
>      * the destructor for DataT would be called on it at program exit.
>      */
>     if (removedData == 0) {
> removedData = (DataT *)malloc(sizeof(DataT));
> assert(removedData != 0);
>     }
>
>     if (foundP = locate(key, index)) {
> unsigned nEntries = numEntries();
>
> Map_freeKey(BODY(body)->data[index].key);
> memcpy(removedData, &BODY(body)->data[index].value, sizeof(DataT));
>
> memmove(&(BODY(body)->data[index]),
> &(BODY(body)->data[index + 1]),
> (nEntries - index - 1) * sizeof(BODY(body)->data[0]));
>
> /*
> * mark previous last slot as free
> */
> Map_noKey(BODY(body)->data[nEntries-1].key);
>
> /*
> * Indicate that deletion occurred
> */
> BODY(body)->deleted = 1;
>
> return removedData;
>     } else {
> return 0;
>     }
> }
>  
> #ifdef __GNUG__
> static
> #endif
> int (*SArray_thisKeyCompare)();  /* used by SArrayIter() to communicate
> * with compareIndex() */
> #ifdef __GNUG__
> static
> #endif
> void *SArray_thisBody;   /* ditto */
>
>
> template <class KeyT, class DataT>
> void
> SArrayIter<KeyT,DataT>::sortKeys()
> {
>     /*
>      * Store keys away and sort them to the user's orders.
>      */
>     unsigned *sortedIndex = new unsigned[numEntries];
>     assert(sortedIndex != 0);
>
>     unsigned i;
>     for (i = 0; i < numEntries; i++) {
> sortedIndex[i] = i;
>     }
>
>     /*
>      * Due to the limitations of the qsort interface we have to
>      * pass extra information to compareIndex in these global
>      * variables - yuck.
>      */
>     SArray_thisKeyCompare = (int (*)())sortFunction;
>     SArray_thisBody = mySArrayBody;
>     qsort(sortedIndex, numEntries, sizeof(*sortedIndex), compareIndex);
>
>     /*
>      * Save the keys for enumeration.  The reason we save the keys,
>      * not the indices, is that indices may change during enumeration
>      * due to deletions.
>      */
>     sortedKeys = new KeyT[numEntries];
>     assert(sortedKeys != 0);
>
>     for (i = 0; i < numEntries; i++) {
> sortedKeys[i] = mySArrayBody->data[sortedIndex[i]].key;
>     }
>
>     delete [] sortedIndex;
> }
>
> template <class KeyT, class DataT>
> SArrayIter<KeyT,DataT>::SArrayIter(const SArray<KeyT,DataT> &sarray,
> int (*keyCompare)(KeyT, KeyT))
>     : mySArrayBody(BODY(sarray.body)), current(0),
>       numEntries(sarray.numEntries()), sortFunction(keyCompare)
> {
>     /*
>      * Note: we access the underlying SArray through the body pointer,
>      * not the top-level SArray itself.  This allows the top-level object
>      * to be moved without affecting an ongoing iteration.
>      * XXX: This only works because
>      * - it is illegal to insert while iterating
>      * - deletions don't cause reallocation of the data
>      */
>     if (sortFunction && mySArrayBody) {
> sortKeys();
>     } else {
> sortedKeys = 0;
>     }
>
>     if (mySArrayBody) {
> mySArrayBody->deleted = 0;
>     }
> }
>
> /*
>  * This is the callback function passed to qsort for comparing array
>  * entries. It is passed the indices into the data array, which are
>  * compared according to the user-supplied function applied to the
>  * keys found at those locations.
>  */
> template <class KeyT, class DataT>
> int
> SArrayIter<KeyT,DataT>::compareIndex(const void *idx1, const void *idx2)
> {
>   return (*(compFnType)SArray_thisKeyCompare)
> (BODY(SArray_thisBody)->data[*(unsigned *)idx1].key,
> BODY(SArray_thisBody)->data[*(unsigned *)idx2].key);
> }
>
> template <class KeyT, class DataT>
> SArrayIter<KeyT,DataT>::~SArrayIter()
> {
>     delete [] sortedKeys;
> }
>
> template <class KeyT, class DataT>
> void
> SArrayIter<KeyT,DataT>::init()
> {
>     delete [] sortedKeys;
>     sortedKeys = 0;
>
>     current = 0;
>
>     {
> /*
> * XXX: fake SArray object to access numEntries()
> */
> SArray<KeyT,DataT> mySArray(0);
>
> mySArray.body = mySArrayBody;
> numEntries = mySArray.numEntries();
> mySArray.body = 0;
>     }
>
>     if (mySArrayBody) {
> if (sortFunction) {
>    sortKeys();
> }
> mySArrayBody->deleted = 0;
>     }
> }
>
> template <class KeyT, class DataT>
> DataT *
> SArrayIter<KeyT,DataT>::next(KeyT &key)
> {
>     if (mySArrayBody == 0) {
> return 0;
>     } else {
> unsigned index;
>
> if (sortedKeys == 0) {
>    /*
>     * Detect deletion while iterating.
>     * A legal deletion can only affect the current entry, so
>     * adjust the current position to reflect the next entry was
>     * moved.
>     */
>    if (mySArrayBody->deleted) {
> numEntries --;
> current --;
>    }
>
>    if (current == numEntries) {
> return 0;
>    }
>
>    index = current++;
> } else {
>    if (current == numEntries) {
> return 0;
>    }
>
>    /*
>     * XXX: fake an SArray object to access locate()
>     */
>    SArray<KeyT,DataT> mySArray(0);
>
>    mySArray.body = mySArrayBody;
>    (void)mySArray.locate(sortedKeys[current++], index);
>    mySArray.body = 0;
> }
> mySArrayBody->deleted = 0;
>
> key = mySArrayBody->data[index].key;
> return &(mySArrayBody->data[index].value);
>     }
> }
>
> #undef BODY
>
> #endif /* _SArray_cc_ */

Click here to go to the SRILM home page.

 

About Us  Vertical divider  R&D Divisions  Divider  Careers  Divider  Newsroom  Divider  Contact Us
©2006 SRI International, 333 Ravenswood Avenue, Menlo Park, CA 94025-3493
SRI International is an independent, nonprofit corporation. Privacy policy

Last modified Dec 02, 2008