2010年8月3日 星期二

消退速度

半年多沒碰程式..
編譯器塵封許久,都快忘記介面的樣子囉..

這半年都在練習腦力外的活動,
發現「學習」這件事,都有個特質,
凡非天生就會的東西,總是要練習到一個質量後,
才能穩定固著下來,退步的速度也才會變慢。


而無論是聰明或鈍,
想通達「完全瞭解」的路,都會是一樣「長」....

2010年1月3日 星期日

Modify k-Means

對 k-Means 的簡單改良。code 寫的不算好。
#define ef else if


template <typename T>
inline void  set_vector (T* out, const T* in, int n)
{
    while (n-- > 0) out[n] = in[n]; 
}

template <typename T>
inline void  add_vector (T* out, const T* in, int n)
{
    while (n-- > 0) out[n] += in[n]; 
}

template <typename T>
inline void div_vector (T* v, int divisor, int n)
{
    while (n-- > 0) v[n] /= divisor; 
}

template <typename T>
inline double cal_distance (T* a, T* b, int n)
{
    double sum = 0, diff;
    while (n-- > 0) {
        diff = a[n] - b[n];
        sum += diff * diff;
    }
    return sqrt (sum);
}

template <typename T = float>
struct Kmeans
{
    int    nMaxIteration;           //最大迭代次數
    int    cur_iter;                //目前的迭代次數
    int    nData;                   //資料項數
    int*   label;                   //資料項的分類標籤
    int*   ccount;                  //群心涵蓋的資料個數
    T**    data;                    //指向資料集合
    T**    center;                  //群心集合 
    T*     radius;                  //群心的超球面半徑      
    T*     new_center;              //計算暫用的群心 
    T*     mean;                    //資料項的幾何中心 
    int    nCenter;                 //群心個數 
    int    dim;                     //資料維度
    double cluster_movement;        //群心更新時的最大移動量

    
    Kmeans() {
        srand ((unsigned int)time(0)); 
        label  = 0;
        ccount = 0;
        radius = 0;
        center = 0;
        mean   = 0; 
        new_center = 0;
    }
   ~Kmeans() {
        release();
    }


protected:

    void set (T* out, const T* in) {set_vector (out, in, dim);}
    void add (T* out, const T* in) {add_vector (out, in, dim);}
    void div (T* v, int divisor)  {div_vector (v, divisor, dim);}
    double distance (T* a, T* b)  {return cal_distance (a, b, dim);}

    void release()                          //釋放資源
    {
        if (label)      del_arr (label);      label  = 0;
        if (ccount)     del_arr (ccount);     ccount = 0;
        if (radius)     del_arr (radius);     radius = 0;
        if (center)     del_arr (center);     center = 0;
        if (mean)       del_arr (mean);       mean   = 0;
        if (new_center) del_arr (new_center); new_center = 0;
    }

    void init_centers ()
    {
        if (nCenter < 1)     ERROR_MSG_("群心數目需 > 1");
        if (nCenter > nData) nCenter = nData; 

        release();                          //釋放資源
        
        new_center = (T*) new_arr (sizeof(T), 1, dim);
        radius = (T*)   new_arr (sizeof(T),   1, nCenter);
        mean   = (T*)   new_arr (sizeof(T),   1, dim);
        center = (T**)  new_arr (sizeof(T),   2, nCenter, dim);
        ccount = (int*) new_arr (sizeof(int), 1, nCenter);
        label  = (int*) new_arr (sizeof(int), 1, nData);


        int i;

        for (i=0; i<nCenter; ++i)           //隨機指定樣本點為群心
            set (center[i], data [rand() % nData]);
        
        for (i=0; i<nData; ++i)             //計算資料項的幾何中心
            add (mean, data[i]);
        div (mean, nData);
        
    }

    void cluster_assignment ()              //將樣本分群
    {
        int     label_id, j, k;
        double  min_dist, d;
        
        memset (ccount, 0, nCenter* sizeof(int));
        memset (radius, 0, nCenter* sizeof(T));


        for (j=0; j<nData; ++j) {
            label_id = 0;
            min_dist = distance (center[0], data[j]);
            for (k=1; k<nCenter; ++k) {
                d = distance (center[k], data[j]);
                if (d < min_dist) {
                    label_id = k;
                    min_dist = d;
                }
            }
            if (radius[label_id] < min_dist)
                radius[label_id] = min_dist;
            label[j] = label_id;            //歸類
            ccount[label_id]++;             //統計群心涵蓋的資料個數 
        }
    }

    void update_cluster ()                  //重新估算 centroids 位置
    {
        int j, k, total;
        double dist = 0;
        cluster_movement = 0;

        for (k=0; k<nCenter; ++k)           
        {
            memset (new_center, 0, dim* sizeof(T));
             total = 0;
            for (j=0; j<nData; ++j) 
                if (label[j] == k) {
                    add (new_center, data[j]);
                    total++;
                }      
            if (total > 0) {           
                div (new_center, total); 
                dist = distance (center[k], new_center);
                if (cluster_movement < dist) 
                    cluster_movement = dist;
                set (center[k], new_center);
            }
            else            
                set (center[k], mean);            
        }        
    }

public:

    //算法主流程: 
    //    nCenter = 群心數目, 
    //    data    = 資料集合 = data [nData_] [dim_] 

    void do_cluster 
        (int nCenter_, T** data_, int nData_, int dim_,
         int nIteration_,
         double movement_threshold)
    {
        setup (nCenter_, data_, nData_, dim_);
        nMaxIteration = nIteration_;

        cur_iter = 0;
        do {
            cluster_assignment ();          //將樣本分群            
            update_cluster ();              //重新估算 centroids 位置
        }
        while (++cur_iter < nMaxIteration && 
               cluster_movement > movement_threshold);
    }


    //單步執行 clustering, 回傳群心最大移動量

    void setup (int nCenter_, T** data_, int nData_, int dim_)
    {
        nCenter  = nCenter_;                //設定群心數目
        data     = data_;                   //指向資料集合
        nData    = nData_;                  //設定資料個數
        dim      = dim_;                    //設定資料維度  
        cur_iter = 0;
        init_centers ();                    //初始化群心位置
    }
    double iterater_cluster ()
    {
        cur_iter++;
        cluster_assignment ();              //將樣本分群            
        update_cluster ();                  //重新估算 centroids 位置
        return cluster_movement;            //回傳群心更新時的最大移動量
    }
};





//--------------------------------------------------------------
// Modify K-Means
//--------------------------------------------------------------

template <typename T = float>
struct ModifyKmeans: Kmeans<T>
{
    enum {MAX_SELECT = 32};                 //每回合最大的群心挑選個數

    T**      best_center;                   //計算暫用的群心集合 
    T**      old_center;                    //計算暫用的群心集合
    T*       avg_radius;                    //群心的平均半徑
    bool*    bAlive;                        //群心存活狀態(用於合併時)
    int      nMaxCenter;                    //最大群心數
    int      nMaxSelect;                    //最大的群心挑選個數
    int      nMinCount;                     //每個群集至少應包含的資料數
    double   movement_threshold;            //指定的群心移動量門檻
    double   avg_dist;                      //每一點到群心的平均距離
    CvRNG    rng;                           //亂數種子


    ModifyKmeans() 
    {
        best_center = 0;
        old_center  = 0;
        avg_radius  = 0;
        bAlive      = 0;
        rng = cvRNG (cvGetTickCount());
    }
   ~ModifyKmeans() {release();}


    void release()                              //釋放資源
    {
        if (best_center) del_arr (best_center); 
        if (old_center)  del_arr (old_center);
        if (avg_radius)  del_arr (avg_radius); 
        if (bAlive)      del_arr (bAlive);   

        old_center  = 0;
        best_center = 0;
        avg_radius  = 0;
        bAlive      = 0;

        Kmeans<T>::release();
    }

    void setup (int nMaxCenter_, T** data_, int nData_, int dim_)
    {
        nMaxCenter = nMaxCenter_;               //設定群心數目
        data       = data_;                     //指向資料集合
        nData      = nData_;                    //設定資料個數
        dim        = dim_;                      //設定資料維度  
        cur_iter   = 0;
        nCenter    = 1;                         //目前輪迴的群心數
        nMaxSelect = MAX_SELECT;

        if (nMaxCenter < 1)     ERROR_MSG_("最大群心數目需 > 1");
        if (nMaxCenter > nData) nMaxCenter = nData; 
        if (nMaxSelect > nData) 
            ERROR_MSG3_("資料項數(%d)需 > 最大的群心挑選個數(%d)",
                nData, nMaxSelect);

        release();                              //釋放資源
        
        new_center  = (T*)   new_arr (sizeof(T),   1, dim);
        radius      = (T*)   new_arr (sizeof(T),   1, nMaxCenter);
        bAlive      = (bool*)new_arr (sizeof(bool),1, nMaxCenter);
        avg_radius  = (T*)   new_arr (sizeof(T),   1, nMaxCenter);
        best_center = (T**)  new_arr (sizeof(T),   2, nMaxCenter, dim);
        old_center  = (T**)  new_arr (sizeof(T),   2, nMaxCenter, dim);
        mean        = (T*)   new_arr (sizeof(T),   1, dim);
        center      = (T**)  new_arr (sizeof(T),   2, nMaxCenter, dim);
        ccount      = (int*) new_arr (sizeof(int), 1, nMaxCenter);
        label       = (int*) new_arr (sizeof(int), 1, nData);

        
        for (int i=0; i<nData; ++i) {           //計算資料項的幾何中心
            add (mean, data[i]);
            label[i] = 0;
        }
        div (mean, nData);
        set (center[0], mean);                  //第1個群心是幾何中心

        shuffle (data, nData);                  //洗牌, 將資料內序打亂
    }

    void cluster_assignment ()                  //將樣本分群
    {
        int     label_id, i, j, k;
        double  min_dist, d;
        
        memset (ccount, 0, nCenter* sizeof(int));
        memset (radius, 0, nCenter* sizeof(T));

        
        for (j=0; j<nData; ++j) {
            label_id = 0;
            min_dist = distance (center[0], data[j]);
            for (k=1; k<nCenter; ++k) {
                d = distance (center[k], data[j]);
                if (d < min_dist) {
                    label_id = k;
                    min_dist = d;
                }
            }
            if (radius[label_id] < min_dist)    //計算每一群的最大半徑  
                radius[label_id] = min_dist;
            label[j] = label_id;                //歸類
            ccount[label_id]++;                 //統計群心涵蓋的資料個數 
        }
    }

    void update_cluster ()                      //重新估算 centroids 位置
    {
        int j, k, total;
        double dist      = 0;
        cluster_movement = 0;        

        for (k=0; k<nCenter; ++k)           
        {
            memset (new_center, 0, dim* sizeof(T));
            total = 0;
            for (j=0; j<nData; ++j) 
                if (label[j] == k) {
                    add (new_center, data[j]);
                    total++;
                }      
            if (total > 0) 
            {
                div (new_center, total); 
                dist = distance (center[k], new_center);
                if (cluster_movement < dist) 
                    cluster_movement = dist;    //計算最小的群心移動量值
                set (center[k], new_center);
            }
            else 
                set (center[k], mean); 
        }    
    }

    void cal_avg_dist()                         //計算每個群心的平均半徑
    {                                           //以及總平均半徑
        int j, k, total;
        int nResetCenter = 0;
        double dist_sum  = 0;
        double cur_dist  = 0;                   //用來加總每個群心的平均半徑

        for (k=0; k<nCenter; ++k)           
        {
            total    = 0;
            dist_sum = 0;                       //計算點集到群心的距離總和
            for (j=0; j<nData; ++j) 
                if (label[j] == k) {
                    dist_sum += distance (center[k], data[j]);
                    total++;
                }      
            if (total > 0) 
            {
                dist_sum /= total;
                avg_radius[k] = dist_sum;       //記錄群心的平均半徑
                cur_dist += dist_sum;           //加總每個群心的平均半徑
            }
            else nResetCenter++;            
        }                                       //取得每一點到群心的平均距離
        avg_dist = cur_dist / (nCenter - nResetCenter); 
    }

    void merge_center (int parent, int child)
    {
        
        memset (new_center, 0, dim* sizeof(T));
        int total = 0;
        for (int j=0; j<nData; ++j) 
            if (label[j] == parent || 
                label[j] == child) {
                add (new_center, data[j]);
                label[j] = parent;
                total++;
            }      
        if (total > 0) 
        {
            div (new_center, total);
            set (center[parent], new_center);
        }                    
    }

    void joint_clusters ()                      //合併群內之群與強連通群
    {
        int i, k, j;
        double d;
        for (k=0; k<nCenter; ++k)
            bAlive[k] = true;

        for (k=0; k<nCenter; ++k)               //合併群內之群
            if (bAlive[k]) 
                for (j=0; j<nCenter; ++j) 
                    if (bAlive[j] && k != j) {
                        d = distance (center[j], center[k]);
                        if (d + radius[j] < radius[k]) {
                            merge_center (k, j);
                            bAlive[j] = false;
                        }
                    }            
        for (j=k=0; k<nCenter; ++k)             //重設群的陣列
            if (bAlive[k]) {
                if (k != j) {
                    set (center[j], center[k]);
                    for (i=0; i<nData; ++i) 
                        if (label[i] == k)
                            label[i] = j;
                }
                j++;
            } 
        nCenter = j;
    }

    void cluster_reassignment ()
    {
        double min_dist, d;
        int label_id, k, j;
        for (j=0; j<nData; ++j) {
            label_id = 0;
            min_dist = distance (center[0], data[j]);
            min_dist -= avg_radius[0]; 
            for (k=1; k<nCenter; ++k) {
                d = distance (center[k], data[j]);
                d -= avg_radius[k];
                if (d < min_dist) {
                    label_id = k;
                    min_dist = d;
                }
            }
            if (radius[label_id] < min_dist)    //計算每一群的最大半徑  
                radius[label_id] = min_dist;
            label[j] = label_id;                //歸類
            ccount[label_id]++;                 //統計群心涵蓋的資料個數 
        }
    }


    bool kmeans (int k_centers)
    {
        nCenter  = k_centers;                   //設定群心個數
        int iter = 0;
        do {
            cluster_assignment ();              //將樣本分群 
                                                //並算出每群的最大半徑
            if (ccount[nCenter-1] < nMinCount)  //若新進群心的包含點數過少
                return false;                   //便捨棄之, 再選別的來試
            update_cluster ();                  //重新估算 centroids 位置
            cal_avg_dist ();                    //計算每個群心的平均半徑
        }
        while (++iter < nMaxIteration &&
               cluster_movement > movement_threshold);
        return true;
    }

public:

    void do_cluster 
        (int nMaxCenter_,                       //最大群心數
         T** data_, int nData_, int dim_,       //資料
         int    nMaxIteration_,                 //最大迭代次數
         double mov_threshold_,                 //誤差門檻: 判別是否收斂
         double min_cluster_ratio)              //規定每個群集至少應包含 
                                                //nData * ratio 筆資料
    {
        setup (nMaxCenter_, data_, nData_, dim_);

        set (best_center[0], center[0]);

        nMinCount          = int (nData * min_cluster_ratio);
        nMaxIteration      = nMaxIteration_;    //內部 K-means 的最大迭代次數
        movement_threshold = mov_threshold_;

        cur_iter = 0;
        avg_dist = 0;

        double old_dist = (std::numeric_limits<double>::max)();
        int i, j, k; 
        int nCurCenter = 2;                     //目前的群心個數
                                                //=2 是因為開始時, 以幾何
                                                //中心為主, 就已分了一群
        do {
            double cost   = 0;

            for (j=0; j<nCurCenter-1; ++j)      //將目前的最佳群心 
                set (old_center[j], center[j]); //保存到 old_center 中

            for (i=0; i<nMaxSelect; ++i)        //嘗試 nMaxSelect 次
            {
                cur_iter++;                     //計算 kMean 總共做了幾次  
                
                do {                            //持續嘗試做 k-Means 
                    j = nCurCenter - 1;         //直到生成一個有效的群心
                    k = cvRandInt (&rng) % nData;
                    set (center[j], data[k]); 
                } 
                while (!kmeans (nCurCenter));   //分成 nCurCenter 群

                if (i == 0) 
                    cost = avg_dist;                    
                else                            //是否更新最佳群心集合?
                    if (avg_dist < cost) {     
                        cost = avg_dist;
                        for (j=0; j<nCurCenter; ++j)
                            set (best_center[j], center[j]);
                    }                
            }
            for (j=0; j<nCurCenter; ++j)        //將最佳群心拷貝到 center
                set (center[j], best_center[j]);//供下一輪 k-Mean 使用


            // 終止條件判斷

            if (old_dist - avg_dist > 2)
                old_dist = avg_dist;
            else break;            
        }
        while (++nCurCenter <= nMaxCenter_);


        nCenter = nCurCenter - 1;
        for (j=0; j<nCenter; ++j)               //將最佳群心拷貝到 
            set (center[j], old_center[j]);     //center 中

        cluster_assignment ();                  //將資料歸屬到最終的群心
        
        for (i=0; i<2; ++i) {                   //合併相近的群
            joint_clusters ();                  //共 refine 2 次
            cal_avg_dist ();
            cluster_reassignment ();
        }
};


底下測試框架生成 2D GMM 群落,對各算法進行比對:
template <class T>
struct KMeanTest
{
    enum {CD = 3, 
          MAX_CLUSTERS = (CD+1)*(CD+1)*(CD+1),  //最大群心數
          W=500, H=500};                        //展示影像的寬高

    CvScalar    color_tbl[MAX_CLUSTERS];
    IplImage*   img [ALGO_NUM];             //2D展示影像

    T** data;                               //資料集合
    int nCenter;                            //群心個數
    int nData;                              //資料項數
    int dim;                                //資料維度

    void generate_color_table()
    {
        int r, g, b, i=0, d = 255/CD;
        for (r=0; r<256; r+=d)
            for (g=0; g<256; g+=d)
                for (b=0; b<256; b+=d)
                    color_tbl[i++] = CV_RGB (r,g,b);
        shuffle (color_tbl+1, MAX_CLUSTERS-2);
    }


    //以 nCluster 個高斯 (正態) 分佈產生樣本點
    //若 nCluster = -1 , 就隨機產生樣本點
    //若 nCluster = 0  , 就隨機令 nCluster = 1 ~ MAX_CLUSTERS

    void generate_sample (int nCluster = -1)
    {      
        release();

        double  sx, sy;
        int     cx, cy;
        int     i = 0;

        data = (T**) new_arr (sizeof(T), 2, nData, dim);

        if (nCluster == -1)                   //若中心點數 = -1  
            for (int i=0; i<nData; ++i) {    //就隨機產生樣本點
                data[i][0] = T (rand() % W);
                data[i][1] = T (rand() % H);
            }
        else 
        {            
            if (nCluster == 0)
                nCluster = rand() % MAX_CLUSTERS + 1;

            CvRNG  rng    = cvRNG (cvGetTickCount());
            CvMat* points = cvCreateMat (nData, 1, CV_32SC2);
  
            for (int k = 0; k < nCluster; ++k)  //generate random sample  
            {                                   //from multi-gaussian 
                                                //distribution
                const int B = 80;               //中心點離邊框的距離
                cx = B + rand() % (W-2*B); 
                cy = B + rand() % (H-2*B);
                sx = W * ((0.01 + cvRandReal (&rng))/ 10.0);
                sy = H * ((0.01 + cvRandReal (&rng))/ 10.0);


                //令群數 = A, A = (nData/ nCluster) * (1 +- 0.1)

                double ratio = 1 + (cvRandReal (&rng) - 0.5)/5;
                points->rows = int ((nData / nCluster) * ratio);
                
                //到最後一群時, 若還剩下很多名額, 就都分配給它

                if (k == nCluster-1 && points->rows < nData-i)
                    points->rows = nData - i;

                cvRandArr (&rng, points, CV_RAND_NORMAL,
                           cvScalar (cx, cy),       //平均數
                           cvScalar (sx, sy));      //標準差

                for (int j=0; j<points->rows; ++j) {
                    data[i][0] = (T) points->data.i[j*2];
                    data[i][1] = (T) points->data.i[j*2+ 1];
                    i++;
                    if (i >= nData) break;
                }
                if (i >= nData) break;
            }

            cvReleaseMat( &points );            
        }
    }
};


比對圖例:

3 - GM:


6 - GM:


2009年12月27日 星期日

閉路搜尋



#include <conio.h>
#include <cstdarg>
#include <cstdio>
#include "global.h"


#define _STACK_CHECK_BOUNDARY_
template <class T, int N=512>
class Stack
{
    T& err (int n) {
        static char* s[] = {
            "Overflow","Underflow",
            "Out of range","Empty"
        };
        puts (s[n]); getch(); 
        return d[0];
    }
public:
    T d[N];                //資料項
    int n;                 //元素個數  
    Stack()      { reset(); } 
    void reset() { n = 0; }

#ifdef _STACK_CHECK_BOUNDARY_
    void push (T x)       { if (n<N) d[n++] = x; else err(0);}
    T pop()               { return n? d[--n]: err(1); }
    T& operator*()        { return n>0? d[n-1]: err(3); }
    T& operator[] (int i) { return 0<=i&&i<n? d[i]: err(2); }
#else
    void push (T x)       { d[n++] = x; }
    T pop()               { return n? d[--n]: *d; }
    T& operator*()        { return d[n-1]; }
    T& operator[] (int i) { return d[i]; }
#endif
};


//------------------------------------------------------------
// 使用方式:
//
//   1. 先 reset
//   2. 以 add_vertex 依序由 0~N 號配置頂點
//   3. 用 add_edge 加入目前點的外接點群
//   4. 重複 2 3 步直到配置完畢
//
//------------------------------------------------------------

struct Graph                        //圖形結構
{
    enum {
        MAX_V  = 256,               //節點最大個數
        MAX_ES = 4096               //Edge Stack 的容量
    };
    struct Vertex {
        int* edge;                  //鄰接點群
        int  nEdge;                 //鄰接邊數 (接出去的邊數)
    };
    
    Vertex  V [MAX_V];              //Vertex stack
    int     nV;                     //目前的頂點數
    int     curV;                   //目前正在做設置的頂點編號
    int     ES[MAX_ES];             //Edge Stack
    int     nES;                    //目前的邊線數

    void reset ()
    {
        nV = nES = 0;
    }

    void add_vertex (int id)        //id 為欲設置的頂點編號
    {
        curV = id;
        if (id != nV) 
            ERROR_MSG3_("頂點設置錯誤: %d 應為 %d", id, nV);
        V[curV].edge  = &ES[nES];
        V[curV].nEdge = 0;
        nV++;
    }

    void add_edge (int id)          //id 為欲外接的頂點編號
    {
        V[curV].nEdge++;
        ES[nES++] = id;
    }

    void add_edges (int v, int n...)
    {
        add_vertex (v);
        va_list p;
        va_start (p, n);
        while (n-- > 0) 
            add_edge (va_arg (p, int));
    }

    void print_edges ()
    {
        int i, j;
        for (i=0; i<nV; ++i) {
            printf ("\n %2d: ", i); 
            for (j=0; j<V[i].nEdge; ++j)
                printf ("%2d ", V[i].edge[j]); 
        }
    }
};



//------------------------------------------------------------
// 閉路搜尋: 
//
//     使用 Tarjan 算法尋找 strongly connected nodes
//     並略去元素個數為 1 或 2 的解 
//
//------------------------------------------------------------

class FindLoops                         //閉路搜尋算法
{
    typedef Stack<int,Graph::MAX_ES> TStack; 

    Graph::Vertex* V; 
    TStack  stack;                      //保存正被遍歷的頂點群
    int     visit    [Graph::MAX_V];    //存放頂點的 DFS 拜訪次序
    bool    bInStack [Graph::MAX_V];    //標示頂點是否在 stack 中
    int     order;                      //目前的拜訪次序

    void tarjan (int v)                 //傳入起始節點編號
    {
        static int cur;
        int  min = visit[v] = order++;  //min 存放頂點可追溯到的
                                        //最前頂點的拜訪次序
        int  t;
        int  n = V[v].nEdge;
        int* e = V[v].edge;
        
        stack.push (v);
        bInStack[v] = true;

        while (n-- > 0) {
            t = *e++;                   //取出鄰接點編號
            if (visit[t] == -1)         //以 DFS 拜訪鄰接點
                tarjan (t);
            else cur = visit[t];
            if (bInStack[t] && cur < min) 
                min = cur;
        }
        if (visit[v] == min) {
            puts ("");
            do printf ("%2d ", t=stack.pop()),
               bInStack[t] = false;
            while (t != v);
        }
    }
public:

    void operator() (Graph& graph)
    {   
        stack.reset();
        V = graph.V;
        order = 0;                //-1 = 未被拜訪
        memset (visit, -1, sizeof visit);
        for (int i=0; i<graph.nV; ++i)
            if (visit[i] < 0)                 
                tarjan (i);            
    }
};

2009年12月26日 星期六

前中後序運算式-2

 
以下程式說明了當運算子很多時,
最好用 reverse polish 的 stack 解法,而不要做 recursive descent,
否則 60 幾行可完成的 code 會變成好幾百行...

#include <iostream>
#include <ctype.h>
using namespace std;

template <class T, int N=512>
class Stack
{
    T err (int n)
    {
        static char* s[] = {
            "Overflow","Underflow",
            "Out of range","Empty"
        };
        puts (s[n]); getch(); 
        return d[0];
    }
public:
    T d[N];                //資料項
    int n;                 //元素個數  
    Stack()               { n = 0; }
    void push (T x)       { if (n<N) d[n++] = x; else err(0);}
    T pop()               { return n? d[--n]: err(1); }
    T& operator*()        { return n>0? d[n-1]: err(3); }
    T& operator[] (int i) { return 0<=i&&i<n? d[i]: err(2); }
};
class Expression { char *s, c, *p, buf[512]; void next () {c = *s++;} void put (char c1, char c2=0) { *p++ = c1; if (c2 != 0) *p++ = c2; } int expr () { if (!or()) return 0; while (c=='|') { next(); if(c != '|') return 0; next(); if(!or()) return 0; put ('|','|'); }return 1; } int or () { if (!and()) return 0; while (c=='&') { next(); if(c != '&') return 0; next(); if(!and()) return 0; put ('&','&'); }return 1; } int and () { if (!compar()) return 0; char op[3] = {0,0,0}; while (c=='<' || c=='>') { op[0] = c; next(); if(c == '=') {op[1]='='; next();} if(!compar()) return 0; put (op[0], op[1]); }return 1; } int compar () { if (!term()) return 0; for (char op; c=='+' || c=='-';) { op = c; next(); if(!term()) return 0; put (op); }return 1; } int term () { if (!fact()) return 0; for (char op; c=='*' || c=='/';) { op = c; next(); if (!fact()) return 0; put (op); }return 1; } int fact () { if (!expo()) return 0; while (c=='^') { next(); if (!expo()) return 0; put ('^'); }return 1; } int expo () { if (c == '!') { next(); if(!invers()) return 0; put ('!'); } else if (!invers()) return 0; return 1; } int invers () { if (isalpha(c)) { put (c); next(); return 1; } else if (c=='(') { next(); if (expr() && c == ')') { next(); return 1; } }return 0; } //--------------------------------- struct Node { char c[3]; Node *L,*R; Node () { c[1]=c[2]=0; L=R=0; } Node (char c1, char c2=0, Node* l=0, Node* r=0) { c[0]=c1; c[1]=c2; c[2]=0; L=l; R=r; } Node (char c1, Node* l, Node* r) { c[0]=c1; c[1]=0; L=l; R=r; } }; Node* build_exptree (char* postfix) { Stack <Node*> s; Node *o, *L, *R; char c1, c2, *p = postfix; while(*p) { if(isalpha(*p) || isdigit(*p)) { s.push (new Node(*p)); } else { c1 = *p; c2 = 0; L = 0; R = s.pop(); if (c1 != '!') L = s.pop(); if (c1=='&'||c1=='|') c2 = *++p; else if ((c1=='<'||c1=='>')&& p[1]=='=') c2 = *++p; s.push (new Node (c1, c2, L, R)); } p++; } return s.pop(); } void preorder (Node *x) { if(x != NULL) { cout << x->c; preorder (x->L); preorder (x->R); } } void inorder (Node *x) { if(x != NULL) { preorder (x->L); cout << x->c; preorder (x->R); } } void postorder (char* exp) { p = buf; s = exp; next(); expr(); put(0); } //----------------------------------- public: void postfix (char* exp) { postorder (exp); cout << buf; } void prefix (char* exp) { postorder (exp); preorder (build_exptree (buf)); } }; void main (/* daviddr */) { char* s[] = { "(a-b*(c+d)^e)&&((f+g^h)-i*j+k/l)", "!((a^b-c/d+e/f))||(g-(h^i+j*k)/l+m)", "(a-b/c)*d^f<=(g+h*i)/j+k^l-m" }; Expression exp; for (int i=0; i<3; i++) { puts(""); exp.postfix(s[i]); puts(""); exp.prefix (s[i]); } }

2009年12月25日 星期五

有向鄰接圖資料結構

若頂點總數為 V
所有頂點的鄰接點數總和為 N

常見的結構有:

1. Adjacency Matrix

    使用空間 V * V 

2. Adjacency List:
    記錄 out edge
    若串列指標總和大小為 N
    使用空間 V * (2*(1+N))

3. Adjacency Multilists:
    用 2 個 list 記錄 out edge 與 in edge
    使用空間 2 * V * (2*(1+N))

4. Sequential RepresentationIndex Table:
    當頂點數多但 edge 少時,相當省空間與時間。
    使用空間 V + (V+N)
    前頭的 V 為索引表大小

2009年12月24日 星期四

高維下的 kd-tree

 
http://www.codeproject.com/KB/architecture/KDTree.aspx

Anton Milev 文中寫到:

For small dimensions the KD-tree can be much faster then sequential search, however for 10-dimensional space the checked nodes for rises to about 25% for. It is possible to reduce this number 2-3 times if the tree is balanced but the tendency of exponential rise of the checked nodes makes the KD unpracticle above 15 dimensions. The second curve shows how KD / Brute ratio changes with space dimension, Brute stands for sequential search, it checks all nodes. Practiclly, for 10000 points, the KD tree becomes as fast as the sequential search for dimensions bigger then 10.

With the analyses made in this article I showed that the standard KD-tree is not good for dimensions beyond 10. In my next article about KD trees and the nearest neighbours search in N-dimensions I will introduce an extention of the quad tree for N-dimensional data - QD-tree, it is faster then KD and allows run-time balancing. However for big dimensions it again becomes slow. This problem is well known in the computational geometry, this would be so for any data structure based only on tree. From another hand it is not possible to make a multidimensional linked-list and to include it in the tree. The problem is quite complicated and requires new kinds data structures.