[FAQ]Sinusprobleme

Werbeanzeige

Anonymous

unregistriert

21.02.2006, 22:04

FAQ gefährdet

Zum Seitenanfang

CW_Kovok

Alter Hase

Beiträge: 836

Wohnort: nähe Bonn

Beruf: Schüler

25.02.2006, 10:57

Also jetzt ist das so:

C-/C++-Quelltext

#include <iostream>
#include <ctime>
#include <cmath>

using namespace std;

const float pi  = atan(1.0);

inline float FastSin0(const float& fAngle)
{
    float fASqr = fAngle*fAngle;
    float fResult = 7.61e-03f;
    
    fResult *= fASqr;
    fResult -= 1.6605e-01f;
    fResult *= fASqr;
    fResult += 1.0f;
    fResult *= fAngle;
    return fResult;
}

inline float FastSin1(const float& fAngle)
{
    float fASqr = fAngle*fAngle;
    float fResult = -2.39e-08;

    fResult *= fASqr;
    fResult += 2.7526e-06f;
    fResult *= fASqr;
    fResult -= 1.98409e-04f;
    fResult *= fASqr;
    fResult += 8.3333315e-03f;
    fResult *= fASqr;
    fResult -= 1.666666664e-01f;
    fResult *= fASqr;
    fResult += 1.0f;
    fResult *= fAngle;

    return fResult;
}

inline float TnSin(float x)
{
    float f2 = x * x;
    float f3 = x * f2;
    float f5 = f3 * f2;
    float f7 = f5 * f2;
    float f9 = f7 * f2;
    x -= 1.0f / 6.0f * f3;
    x += 1.0f / 120.0f * f5;
    x -= 1.0f / 5040.0f * f7;
    x += 1.0f / 362880.0f * f9;
    return x;
}

int main(void)
{
    long start, ende;
    float a1, a2, a3, a4, a5, t = 0.0f;

    start = clock();
    
    for(int j=0; j<1e3; j++)
        for(float i=0.0f; i<pi; i=i+0.0001f)
            t += FastSin0(i);

    ende = clock();

    a1 = float(ende-start);

    cout<<t<<endl;
    cout<<"FastSin0 fertig"<<endl;

    t = 0.0f;
    start = clock();
    
    for(int j=0; j<1e3; j++)
        for(float i=0.0f; i<pi; i=i+0.0001f)
            t += FastSin1(i);

    ende = clock();

    a2 = float(ende-start);

    cout<<t<<endl;
    cout<<"FastSin1 fertig"<<endl;

    t = 0.0f;
    start = clock();
    
    for(int j=0; j<1e3; j++)
        for(float i=0.0f; i<pi; i=i+0.0001f)
            t += sinf(i);

    ende = clock();

    a3 = float(ende-start);

    cout<<t<<endl;
    cout<<"sinf fertig"<<endl;

    t = 0.0f;
    start = clock();
    
    for(int j=0; j<1e3; j++)
        for(float i=0.0f; i<pi; i=i+0.0001f)
            t += sin(i);

    ende = clock();

    a4 = float(ende-start);

    cout<<t<<endl;
    cout<<"sin fertig"<<endl;

    t = 0.0f;
    start = clock();
    
    for(int j=0; j<1e3; j++)
        for(float i=0.0f; i<pi; i=i+0.0001f)
            t += TnSin(i);

    ende = clock();

    a5 = float(ende-start);

    cout<<t<<endl;
    cout<<"TnSin fertig"<<endl;

    cout<<"sinf: "<<a3/CLOCKS_PER_SEC<<" entspricht: "<<a3/a3*100<<"%"<<endl;
    cout<<"sin: "<<a4/CLOCKS_PER_SEC<<" entspricht: "<<a3/a4*100<<"%"<<endl;
    cout<<"FastSin0: "<<a1/CLOCKS_PER_SEC<<" entspricht: "<<a3/a1*100<<"%"<<endl;
    cout<<"FastSin1: "<<a2/CLOCKS_PER_SEC<<" entspricht: "<<a3/a2*100<<"%"<<endl;
    cout<<"TnSin: "<<a5/CLOCKS_PER_SEC<<" entspricht: "<<a3/a5*100<<"%"<<endl;

    float p = 0.0f;

    for(float i=0.0f; i<pi; i+= 0.0001f)
    {
        float j = sin(double(i)) - TnSin(i);
        if(p<j)
            p = j;
    }

    cout<<scientific<<"Genauigkeit TnSin: "<<p<<endl;
   
    p = 0.0f;

    for(float i=0.0f; i<pi; i+= 0.0001f)
    {
        float j = sin(double(i)) - FastSin0(i);
        if(p<j)
            p = j;
    }

    cout<<"Genauigkeit FastSin0: "<<p<<endl;

    p = 0.0f;

    for(float i=0.0f; i<pi; i+= 0.0001f)
    {
        float j = sin(double(i)) - FastSin1(i);
        if(p<j)
            p = j;
    }

    cout<<"Genauigkeit FastSin1: "<<p<<endl;

    p = 0.0f;

    for(float i=0.0f; i<pi; i+= 0.0001f)
    {
        float j = sin(double(i)) - sinf(i);
        if(p<j)
            p = j;
    }

    cout<<"Genauigkeit sinf: "<<p<<endl;

    return 0;
}

Dann erhalte ich als ergebnis:

Zitat

2.92468e+006
FastSin0 fertig
2.92422e+006
FastSin1 fertig
2.92422e+006
sinf fertig
2.92422e+006
sin fertig
2.92422e+006
TnSin fertig
sinf: 0.812 entspricht: 100%
sin: 1.063 entspricht: 76.3876%
FastSin0: 0.656 entspricht: 123.78%
FastSin1: 1 entspricht: 81.2%
TnSin: 0.672 entspricht: 120.833%
Genauigkeit TnSin: 1.047701e-007
Genauigkeit FastSin0: 2.530216e-010
Genauigkeit FastSin1: 5.518797e-008
Genauigkeit sinf: 2.979680e-008
Drücken Sie eine beliebige Taste . . .

Immer noch keine 4 bis achtfache geschwindigkeit, wieso?

Was es alles gibt, das ich nich brauche - Aristoteles

Zum Seitenanfang

David Scherfgen

Administrator

Beiträge: 10 382

Wohnort: Hildesheim

Beruf: Wissenschaftlicher Mitarbeiter

25.02.2006, 13:24

Und das hier erhalte ich:

Quellcode

2.92893e+06
FastSin0 fertig
2.92859e+06
FastSin1 fertig
2.92859e+06
sinf fertig
2.92859e+06
sin fertig
2.92859e+06
TnSin fertig
sinf: 0.468 entspricht: 100%
sin: 0.469 entspricht: 99.7868%
FastSin0: 0.046 entspricht: 1017.39%
FastSin1: 0.079 entspricht: 592.405%
TnSin: 0.109 entspricht: 429.358%
Genauigkeit TnSin: 1.142471e-09
Genauigkeit FastSin0: 0.000000e+00
Genauigkeit FastSin1: 2.433674e-09
Genauigkeit sinf: 0.000000e+00

Seltsam ist die Genauigkeit von FastSin0!
Ich habe übrigens einen Athlon64 3000+.

David Scherfgens Website | Konzentrationstest Polizei

Zum Seitenanfang

CW_Kovok

Alter Hase

Beiträge: 836

Wohnort: nähe Bonn

Beruf: Schüler

25.02.2006, 13:32

habe ich auch, welchen compiler benutzt du? Hier das ganze mal inm Releasemode

Zitat

2.92468e+006
FastSin0 fertig
2.92422e+006
FastSin1 fertig
2.92422e+006
sinf fertig
2.92422e+006
sin fertig
2.92422e+006
TnSin fertig
sinf: 0.438 entspricht: 100%
sin: 0.453 entspricht: 96.6887%
FastSin0: 0.219 entspricht: 200%
FastSin1: 0.516 entspricht: 84.8837%
TnSin: 0.328 entspricht: 133.537%
Genauigkeit TnSin: 1.047701e-007
Genauigkeit FastSin0: 2.530216e-010
Genauigkeit FastSin1: 5.518797e-008
Genauigkeit sinf: 2.979680e-008
Drücken Sie eine beliebige Taste . . .

Was es alles gibt, das ich nich brauche - Aristoteles

Zum Seitenanfang

David Scherfgen

Administrator

Beiträge: 10 382

Wohnort: Hildesheim

Beruf: Wissenschaftlicher Mitarbeiter

25.02.2006, 13:45

Visual C++.NET 2003, Release-Mode, nur Standard-Optimierungen

David Scherfgens Website | Konzentrationstest Polizei

Zum Seitenanfang

CW_Kovok

Alter Hase

Beiträge: 836

Wohnort: nähe Bonn

Beruf: Schüler

25.02.2006, 13:48

komisch, ich benutze visual express 2005, der müsste doch ähnlich gute ergebnisse erzielen oder?

//edit:
fmodf ist ein speedkiller oder?

Was es alles gibt, das ich nich brauche - Aristoteles

Zum Seitenanfang

CW_Kovok

Alter Hase

Beiträge: 836

Wohnort: nähe Bonn

Beruf: Schüler

25.02.2006, 20:44

habe in den optionen unter project->propertys->c++->codegeneration die option floating point model auf fast gestellt mit dem ergebnis:

Zitat

2.92893e+006
FastSin0 fertig
2.92859e+006
FastSin1 fertig
2.92859e+006
sinf fertig
2.92859e+006
sin fertig
2.92859e+006
TnSin fertig
sinf: 0.468 entspricht: 100%
sin: 0.453 entspricht: 103.311%
FastSin0: 0.046 entspricht: 1017.39%
FastSin1: 0.079 entspricht: 592.405%
TnSin: 0.094 entspricht: 497.872%
Genauigkeit TnSin: 1.142471e-009
Genauigkeit FastSin0: 0.000000e+000
Genauigkeit FastSin1: 2.433674e-009
Genauigkeit sinf: 0.000000e+000
Drücken Sie eine beliebige Taste . . .

also deinen sehr ähnliche ergebnisse, jetzt also die frage, welche einstellung entscheiden über die geschwindigkeit, aus der einen gibbet da noch viele andere, die alle irgendwie die geschwindigkeit beeinflussen. Wie kriegt man es denn am schnellsten?

Was es alles gibt, das ich nich brauche - Aristoteles

Zum Seitenanfang

CW_Kovok

Alter Hase

Beiträge: 836

Wohnort: nähe Bonn

Beruf: Schüler

26.02.2006, 12:22

So jetzt mal die volle Version, mit allen optimierungen, auch der oben schon genannten fast einstellung unter code generation(nur ohne assembler):

trigfunc.h:

C-/C++-Quelltext

#include <cmath>

const float CE_PI              = (float)(4.0*atan(1.0));
const float CE_HALF_PI         = (float)(2.0*atan(1.0));
const float CE_QUATER_PI       = (float)(atan(1.0));
const float CE_TWO_PI          = (float)(8.0*atan(1.0));

const double CE_DPI            = 4.0*atan(1.0);
const double CE_DHALF_PI       = 2.0*atan(1.0);
const double CE_DQUATER_PI     = atan(1.0);
const double CE_DTWO_PI        = 8.0*atan(1.0);

//some very fast sin and asin functions

template<typename T> inline T FastSin0(const T& tAngle)
{
    T tASqr = tAngle*tAngle;
    T tResult = (T)7.61e-03;
    
    tResult *= tASqr;
    tResult -= (T)1.6605e-01;
    tResult *= tASqr;
    tResult += (T)1.0;
    tResult *= tAngle;

    return tResult;
}

template<typename T> inline T FastSin1(const T& tAngle)
{
    T tASqr = tAngle*tAngle;
    T tResult = (T)-2.39e-08;

    tResult *= tASqr;
    tResult += (T)2.7526e-06f;
    tResult *= tASqr;
    tResult -= (T)1.98409e-04f;
    tResult *= tASqr;
    tResult += (T)8.3333315e-03f;
    tResult *= tASqr;
    tResult -= (T)1.666666664e-01f;
    tResult *= tASqr;
    tResult += (T)1.0f;
    tResult *= tAngle;

    return tResult;
}

template<typename T> inline T FastSinTn(const T& tAngle)
{
    T tResult = tAngle;
    
    T t2 = tAngle * tAngle;
    T t3 = tAngle * t2;
    T t5 = t3 * t2;
    T t7 = t5 * t2;
    T t9 = t7 * t2;
    
    tResult -= 1.0f / 6.0f * t3;
    tResult += 1.0f / 120.0f * t5;
    tResult -= 1.0f / 5040.0f * t7;
    tResult += 1.0f / 362880.0f * t9;
    
    return tResult; 
}

template<typename T> inline T FastInvSin(const T& tValue)
{
    T tRoot = sqrt(((double)1.0)-tValue);
    T tResult = -(T)0.0187293;

    tResult *= tValue;
    tResult += (T)0.0742610;
    tResult *= tValue;
    tResult -= (T)0.2121144;
    tResult *= tValue;
    tResult += (T)1.5707288;
    tResult = CE_HALF_PI - tRoot*tResult;

    return tResult;
}

//some very fast cos and acos functions

template<typename T> inline T FastCos0(const T& tAngle)
{
    if(tAngle == CE_HALF_PI)
        return 0;

    T tASqr = tAngle*tAngle;
    T tResult = (T)3.705e-02;
    
    tResult *= tASqr;
    tResult -= (T)4.967e-01;
    tResult *= tASqr;
    tResult += (T)1.0;
    
    return tResult;
}

template<typename T> inline T FastCos1(const T& tAngle)
{
    if(tAngle == CE_HALF_PI)
        return 0;

    T tASqr = tAngle*tAngle;
    T tResult = -(T)2.605e-07;

    tResult *= tASqr;
    tResult += (T)2.47609e-05;
    tResult *= tASqr;
    tResult -= (T)1.3888397e-03;
    tResult *= tASqr;
    tResult += (T)4.16666418e-02;
    tResult *= tASqr;
    tResult -= (T)4.999999963e-01;
    tResult *= tASqr;
    tResult += (T)1.0;

    return tResult;
}

template<typename T> inline T FastInvCos(const T& tValue)
{
    T tRoot = sqrt(((double)1.0)-tValue);
    T tResult = -(T)0.0187293;

    tResult *= tValue;
    tResult += (T)0.0742610;
    tResult *= tValue;
    tResult -= (T)0.2121144;
    tResult *= tValue;
    tResult += (T)1.5707288;
    tResult *= tRoot;

    return tResult;
}

und die Datei main.cpp:

C-/C++-Quelltext

#include <iostream>
#include <ctime>

#include "trigfunc.h"

using namespace std;

void TestFastSinFloat(long lTestCount, long lBase)
{
    float fSin0, fSin1, fSinTn, fSinf, fSin, fSin0Accuracy, fSin1Accuracy, fSinTnAccuracy, fTemp=0.0f;
    long lStart, lEnd;

    cout<<"fast Sin<float> (between 0 and pi/2):"<<endl;
    cout<<"-------------------------------------"<<endl;
    cout<<endl;
    
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(float i=0; i<CE_HALF_PI; i=i+1.0f/lBase)
        {
            fTemp += FastSin0<float>(i);
        }

    lEnd   = clock();
    fSin0  = float(lEnd-lStart);

    cout<<scientific<<"Sin with FastSin0(sum): "<<fTemp<<" (against unwanted optimizations)"<<endl;

    fTemp  = 0.0f; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(float i=0; i<CE_HALF_PI; i=i+1.0f/lBase)
        {
            fTemp += FastSin1<float>(i);
        }

    lEnd   = clock();
    fSin1  = float(lEnd-lStart);

    cout<<"Sin with FastSin1: "<<fTemp<<" (against unwanted optimizations)"<<endl;

    fTemp  = 0.0f; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(float i=0; i<CE_HALF_PI; i=i+1.0f/lBase)
        {
            fTemp += FastSinTn<float>(i);
        }

    lEnd   = clock();
    fSinTn = float(lEnd-lStart);

    cout<<"Sin with FastSinTn: "<<fTemp<<" (against unwanted optimizations)"<<endl;

    fTemp  = 0.0f; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(float i=0; i<CE_HALF_PI; i=i+1.0f/lBase)
        {
            fTemp += sinf(i);
        }

    lEnd   = clock();
    fSinf  = float(lEnd-lStart);

    cout<<"Sin with sinf: "<<fTemp<<" (against unwanted optimizations)"<<endl;

    fTemp  = 0.0f; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(float i=0; i<CE_HALF_PI; i=i+1.0f/lBase)
        {
            fTemp += sin(i);
        }

    lEnd   = clock();
    fSin  = float(lEnd-lStart);

    cout<<"Sin with sin: "<<fTemp<<" (against unwanted optimizations)"<<endl;
 
    cout<<fixed<<endl;
    cout<<"FastSin0: "<<fSin0/CLOCKS_PER_SEC<<" sec or "<<(1/fSin0)*lBase<<" per second and Achievement: "<<(fSinf/fSin0)*100-100<<"%"<<endl;
    cout<<"FastSin1: "<<fSin1/CLOCKS_PER_SEC<<" sec or "<<(1/fSin1)*lBase<<" per second and Achievement: "<<(fSinf/fSin1)*100-100<<"%"<<endl;
    cout<<"FastSinTn: "<<fSinTn/CLOCKS_PER_SEC<<" sec or "<<(1/fSinTn)*lBase<<" per second and Achievement: "<<(fSinf/fSinTn)*100-100<<"%"<<endl;
    cout<<"sinf: "<<fSinf/CLOCKS_PER_SEC<<" sec or "<<(1/fSinf)*lBase<<" per second and Achievement: "<<(fSinf/fSinf)*100-100<<"%"<<endl;
    cout<<"sin: "<<fSin/CLOCKS_PER_SEC<<" sec or "<<(1/fSin)*lBase<<" per second and Achievement: "<<(fSinf/fSin)*100-100<<"%"<<endl;
    cout<<endl;

    fSin0Accuracy = 0.0f;

    for(float i=1; i<CE_HALF_PI; i=i+1.0f/lBase)
    {
        fTemp = sin(i)-FastSin0<float>(i);

        if(fTemp<0) fTemp = -fTemp;

        if(fSin0Accuracy<fTemp)
            fSin0Accuracy = fTemp;
    }

    fSin1Accuracy = 0.0f;

    for(float i=1; i<CE_HALF_PI; i=i+1.0f/lBase)
    {
        fTemp = sin(i)-FastSin1<float>(i);

        if(fTemp<0) fTemp = -fTemp;

        if(fSin1Accuracy<fTemp)
            fSin1Accuracy = fTemp;
    }

    fSinTnAccuracy = 0.0f;

    for(float i=1; i<CE_HALF_PI; i=i+1.0f/lBase)
    {
        fTemp = sin(i)-FastSinTn<float>(i);

        if(fTemp<0) fTemp = -fTemp;

        if(fSinTnAccuracy<fTemp)
            fSinTnAccuracy = fTemp;
    }

    cout<<scientific<<"Accuracy FastSin0: "<<fSin0Accuracy<<endl;
    cout<<scientific<<"Accuracy FastSin1: "<<fSin1Accuracy<<endl;
    cout<<scientific<<"Accuracy FastSinTn: "<<fSinTnAccuracy<<endl;
    cout<<fixed<<endl;
}

void TestFastSinDouble(long lTestCount, long lBase)
{
    double dSin0, dSin1, dSinTn, dSin, dSin0Accuracy, dSin1Accuracy, dSinTnAccuracy, dTemp=0.0;
    long lStart, lEnd;

    cout<<"fast Sin<double> (between 0 and pi/2): "<<endl;
    cout<<"--------------------------------------"<<endl;
    cout<<endl;
    
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(double i=0; i<CE_DHALF_PI; i=i+1.0/lBase)
        {
            dTemp += FastSin0<double>(i);
        }

    lEnd   = clock();
    dSin0  = double(lEnd-lStart);

    cout<<scientific<<"Sin with FastSin0(sum): "<<dTemp<<" (against unwanted optimizations)"<<endl;

    dTemp  = 0.0; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(double i=0; i<CE_DHALF_PI; i=i+1.0/lBase)
        {
            dTemp += FastSin1<double>(i);
        }

    lEnd   = clock();
    dSin1  = double(lEnd-lStart);

    cout<<"Sin with FastSin1: "<<dTemp<<" (against unwanted optimizations)"<<endl;

    dTemp  = 0.0; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(double i=0; i<CE_DHALF_PI; i=i+1.0/lBase)
        {
            dTemp += FastSinTn<double>(i);
        }

    lEnd   = clock();
    dSinTn = double(lEnd-lStart);

    cout<<"Sin with FastSinTn: "<<dTemp<<" (against unwanted optimizations)"<<endl;

    dTemp  = 0.0; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(double i=0; i<CE_DHALF_PI; i=i+1.0f/lBase)
        {
            dTemp += sin(i);
        }

    lEnd   = clock();
    dSin  = double(lEnd-lStart);

    cout<<"Sin with sin: "<<dTemp<<" (against unwanted optimizations)"<<endl;
  
    cout<<fixed<<endl;
    cout<<"FastSin0: "<<dSin0/CLOCKS_PER_SEC<<" sec or "<<(1/dSin0)*lBase<<" per sec and achieves: "<<(dSin/dSin0)*100-100<<"%"<<endl;
    cout<<"FastSin1: "<<dSin1/CLOCKS_PER_SEC<<" sec or "<<(1/dSin1)*lBase<<" per sec and achieves: "<<(dSin/dSin1)*100-100<<"%"<<endl;
    cout<<"FastSinTn: "<<dSinTn/CLOCKS_PER_SEC<<" sec or "<<(1/dSinTn)*lBase<<" per sec and achieves: "<<(dSin/dSinTn)*100-100<<"%"<<endl;
    cout<<"sin: "<<dSin/CLOCKS_PER_SEC<<" sec or "<<(1/dSin)*lBase<<" per sec and achieves: "<<(dSin/dSin)*100-100<<"%"<<endl;
    cout<<endl;

    dSin0Accuracy = 0.0f;

    for(double i=1; i<CE_DHALF_PI; i=i+1.0f/lBase)
    {
        dTemp = sin(i)-FastSin0<double>(i);

        if(dTemp<0) dTemp = -dTemp;

        if(dSin0Accuracy<dTemp)
            dSin0Accuracy = dTemp;
    }

    dSin1Accuracy = 0.0f;

    for(double i=1; i<CE_DHALF_PI; i=i+1.0f/lBase)
    {
        dTemp = sin(i)-FastSin1<double>(i);

        if(dTemp<0) dTemp = -dTemp;

        if(dSin1Accuracy<dTemp)
            dSin1Accuracy = dTemp;
    }

    dSinTnAccuracy = 0.0f;

    for(double i=1; i<CE_DHALF_PI; i=i+1.0f/lBase)
    {
        dTemp = sin(i)-FastSinTn<double>(i);

        if(dTemp<0) dTemp = -dTemp;

        if(dSinTnAccuracy<dTemp)
            dSinTnAccuracy = dTemp;
    }

    cout<<scientific<<"Accuracy FastSin0: "<<dSin0Accuracy<<endl;
    cout<<scientific<<"Accuracy FastSin1: "<<dSin1Accuracy<<endl;
    cout<<scientific<<"Accuracy FastSinTn: "<<dSinTnAccuracy<<endl;
    cout<<fixed<<endl;
}

void TestFastCosFloat(long lTestCount, long lBase)
{
    float fCos0, fCos1, fCosf, fCos, fCos0Accuracy, fCos1Accuracy, fTemp=0.0f;
    long lStart, lEnd;

    cout<<"fast Cos<float> (between 0 and pi/2): "<<endl;
    cout<<"-------------------------------------"<<endl;
    cout<<endl;
    
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(float i=0; i<CE_HALF_PI; i=i+1.0f/lBase)
        {
            fTemp += FastCos0<float>(i);
        }

    lEnd   = clock();
    fCos0  = float(lEnd-lStart);

    cout<<scientific<<"Cos with FastCos0(sum): "<<fTemp<<" (against unwanted optimizations)"<<endl;

    fTemp  = 0.0f; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(float i=0; i<CE_HALF_PI; i=i+1.0f/lBase)
        {
            fTemp += FastCos1<float>(i);
        }

    lEnd   = clock();
    fCos1  = float(lEnd-lStart);

    cout<<"Cos with FastCos1: "<<fTemp<<" (against unwanted optimizations)"<<endl;

    fTemp  = 0.0f; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(float i=0; i<CE_HALF_PI; i=i+1.0f/lBase)
        {
            fTemp += cosf(i);
        }

    lEnd   = clock();
    fCosf  = float(lEnd-lStart);

    cout<<"Cos with cosf: "<<fTemp<<" (against unwanted optimizations)"<<endl;

    fTemp  = 0.0f; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(float i=0; i<CE_HALF_PI; i=i+1.0f/lBase)
        {
            fTemp += cos(i);
        }

    lEnd   = clock();
    fCos  = float(lEnd-lStart);

    cout<<"Cos with cos: "<<fTemp<<" (against unwanted optimizations)"<<endl;
 
    cout<<fixed<<endl;
    cout<<"FastCos0: "<<fCos0/CLOCKS_PER_SEC<<" sec or "<<(1/fCos0)*lBase<<" per second and Achievement: "<<(fCosf/fCos0)*100-100<<"%"<<endl;
    cout<<"FastCos1: "<<fCos1/CLOCKS_PER_SEC<<" sec or "<<(1/fCos1)*lBase<<" per second and Achievement: "<<(fCosf/fCos1)*100-100<<"%"<<endl;
    cout<<"cosf: "<<fCosf/CLOCKS_PER_SEC<<" sec or "<<(1/fCosf)*lBase<<" per second and Achievement: "<<(fCosf/fCosf)*100-100<<"%"<<endl;
    cout<<"cos: "<<fCos/CLOCKS_PER_SEC<<" sec or "<<(1/fCos)*lBase<<" per second and Achievement: "<<(fCosf/fCos)*100-100<<"%"<<endl;
    cout<<endl;

    fCos0Accuracy = 0.0f;

    for(float i=1; i<CE_HALF_PI; i=i+1.0f/lBase)
    {
        fTemp = cos(i)-FastCos0<float>(i);

        if(fTemp<0) fTemp = -fTemp;

        if(fCos0Accuracy<fTemp)
            fCos0Accuracy = fTemp;
    }

    fCos1Accuracy = 0.0f;

    for(float i=1; i<CE_HALF_PI; i=i+1.0f/lBase)
    {
        fTemp = cos(i)-FastCos1<float>(i);

        if(fTemp<0) fTemp = -fTemp;

        if(fCos1Accuracy<fTemp)
            fCos1Accuracy = fTemp;
    }

    cout<<scientific<<"Accuracy FastCos0: "<<fCos0Accuracy<<endl;
    cout<<scientific<<"Accuracy FastCos1: "<<fCos1Accuracy<<endl;
    cout<<fixed<<endl;
}

void TestFastCosDouble(long lTestCount, long lBase)
{
    double dCos0, dCos1, dCos, dCos0Accuracy, dCos1Accuracy, dTemp=0.0;
    long lStart, lEnd;

    cout<<"fast Cos<double> (between 0 and pi/2):"<<endl;
    cout<<"--------------------------------------"<<endl;
    cout<<endl;
    
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(double i=0; i<CE_DHALF_PI; i=i+1.0/lBase)
        {
            dTemp += FastCos0<double>(i);
        }

    lEnd   = clock();
    dCos0  = double(lEnd-lStart);

    cout<<scientific<<"Cos with FastCos0(sum): "<<dTemp<<" (against unwanted optimizations)"<<endl;

    dTemp  = 0.0; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(double i=0; i<CE_DHALF_PI; i=i+1.0/lBase)
        {
            dTemp += FastCos1<double>(i);
        }

    lEnd   = clock();
    dCos1  = double(lEnd-lStart);

    cout<<"Cos with FastCos1: "<<dTemp<<" (against unwanted optimizations)"<<endl;

    dTemp  = 0.0; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(double i=0; i<CE_DHALF_PI; i=i+1.0f/lBase)
        {
            dTemp += cos(i);
        }

    lEnd   = clock();
    dCos  = double(lEnd-lStart);

    cout<<"Cos with cos: "<<dTemp<<" (against unwanted optimizations)"<<endl;
  
    cout<<fixed<<endl;
    cout<<"FastCos0: "<<dCos0/CLOCKS_PER_SEC<<" sec or "<<(1/dCos0)*lBase<<" per sec and achieves: "<<(dCos/dCos0)*100-100<<"%"<<endl;
    cout<<"FastCos1: "<<dCos1/CLOCKS_PER_SEC<<" sec or "<<(1/dCos1)*lBase<<" per sec and achieves: "<<(dCos/dCos1)*100-100<<"%"<<endl;
    cout<<"cos: "<<dCos/CLOCKS_PER_SEC<<" sec or "<<(1/dCos)*lBase<<" per sec and achieves: "<<(dCos/dCos)*100-100<<"%"<<endl;
    cout<<endl;

    dCos0Accuracy = 0.0f;

    for(double i=1; i<CE_DHALF_PI; i=i+1.0f/lBase)
    {
        dTemp = cos(i)-FastCos0<double>(i);

        if(dTemp<0) dTemp = -dTemp;

        if(dCos0Accuracy<dTemp)
            dCos0Accuracy = dTemp;
    }

    dCos1Accuracy = 0.0f;

    for(double i=1; i<CE_DHALF_PI; i=i+1.0f/lBase)
    {
        dTemp = cos(i)-FastCos1<double>(i);

        if(dTemp<0) dTemp = -dTemp;

        if(dCos1Accuracy<dTemp)
            dCos1Accuracy = dTemp;
    }

    cout<<scientific<<"Accuracy FastCos0: "<<dCos0Accuracy<<endl;
    cout<<scientific<<"Accuracy FastCos1: "<<dCos1Accuracy<<endl;
    cout<<fixed<<endl;
}

void TestFastInvSinFloat(long lTestCount, long lBase)
{
    float fInvSin,  fAsinf, fAsin, fInvSinAccuracy, fTemp = 0.0f;
    long lStart, lEnd;

    cout<<"fast Asin<float> (between 0 and 1):"<<endl;
    cout<<"-----------------------------------"<<endl;
    cout<<endl;
    
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(float i=0.0f; i<=1.0f; i=i+1.0f/lBase)
        {
            fTemp += FastInvSin<float>(i);
        }

    lEnd    = clock();
    fInvSin = float(lEnd-lStart);

    cout<<scientific<<"asin with FastInvSin(sum): "<<fTemp<<" (against unwanted optimizations)"<<endl;

    fTemp  = 0.0f; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(float i=0.0f; i<=1.0f; i=i+1.0f/lBase)
        {
            fTemp += asinf(i);
        }

    lEnd   = clock();
    fAsinf = float(lEnd-lStart);

    cout<<"asin with asinf: "<<fTemp<<" (against unwanted optimizations)"<<endl;

    fTemp  = 0.0f; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(float i=0.0f; i<=1.0f; i=i+1.0f/lBase)
        {
            fTemp += asin(i);
        }

    lEnd   = clock();
    fAsin  = float(lEnd-lStart);

    cout<<"asin with asin: "<<fTemp<<" (against unwanted optimizations)"<<endl;

    cout<<fixed<<endl;
    cout<<"FastInvSin: "<<fInvSin/CLOCKS_PER_SEC<<" sec or "<<(1/fInvSin)*lBase<<" per second and Achievement: "<<(fAsinf/fInvSin)*100-100<<"%"<<endl;
    cout<<"asinf: "<<fAsinf/CLOCKS_PER_SEC<<" sec or "<<(1/fAsinf)*lBase<<" per second and Achievement: "<<(fAsinf/fAsinf)*100-100<<"%"<<endl;
    cout<<"asin: "<<fAsin/CLOCKS_PER_SEC<<" sec or "<<(1/fAsin)*lBase<<" per second and Achievement: "<<(fAsinf/fAsin)*100-100<<"%"<<endl;
    cout<<endl;

    fInvSinAccuracy = 0.0f;

    for(float i=0.0f; i<=1.0f; i=i+1.0f/lBase)
    {
        fTemp = asin(i)-FastInvSin<float>(i);

        if(fTemp<0) fTemp = -fTemp;

        if(fInvSinAccuracy<fTemp)
            fInvSinAccuracy = fTemp;
    }

    cout<<scientific<<"Accuracy FastInvSin: "<<fInvSinAccuracy<<endl;
    cout<<fixed<<endl;
}

void TestFastInvSinDouble(long lTestCount, long lBase)
{
    double fInvSin, fAsin, fInvSinAccuracy, fTemp = 0.0f;
    long lStart, lEnd;

    cout<<"fast Asin<double> (between 0 and 1):"<<endl;
    cout<<"------------------------------------"<<endl;
    cout<<endl;
    
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(double i=0.0f; i<=1.0f; i=i+1.0f/lBase)
        {
            fTemp += FastInvSin<double>(i);
        }

    lEnd    = clock();
    fInvSin = double(lEnd-lStart);

    cout<<scientific<<"asin with FastInvSin(sum): "<<fTemp<<" (against unwanted optimizations)"<<endl;

    fTemp  = 0.0f; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(double i=0.0f; i<=1.0f; i=i+1.0f/lBase)
        {
            fTemp += asin(i);
        }

    lEnd   = clock();
    fAsin  = double(lEnd-lStart);

    cout<<"asin with asin: "<<fTemp<<" (against unwanted optimizations)"<<endl;

    cout<<fixed<<endl;
    cout<<"FastInvSin: "<<fInvSin/CLOCKS_PER_SEC<<" sec or "<<(1/fInvSin)*lBase<<" per second and Achievement: "<<(fAsin/fInvSin)*100-100<<"%"<<endl;
    cout<<"asin: "<<fAsin/CLOCKS_PER_SEC<<" sec or "<<(1/fAsin)*lBase<<" per second and Achievement: "<<(fAsin/fAsin)*100-100<<"%"<<endl;
    cout<<endl;

    fInvSinAccuracy = 0.0f;

    for(double i=0.0f; i<=1.0f; i=i+1.0f/lBase)
    {
        fTemp = asin(i)-FastInvSin<double>(i);

        if(fTemp<0) fTemp = -fTemp;

        if(fInvSinAccuracy<fTemp)
            fInvSinAccuracy = fTemp;
    }

    cout<<scientific<<"Accuracy FastInvSin: "<<fInvSinAccuracy<<endl;
    cout<<fixed<<endl;
}

void TestFastInvCosFloat(long lTestCount, long lBase)
{
    float fInvCos,  fAcosf, fAcos, fInvCosAccuracy, fTemp = 0.0f;
    long lStart, lEnd;

    cout<<"fast acos<float> (between 0 and 1):"<<endl;
    cout<<"-----------------------------------"<<endl;
    cout<<endl;
    
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(float i=0.0f; i<=1.0f; i=i+1.0f/lBase)
        {
            fTemp += FastInvCos<float>(i);
        }

    lEnd    = clock();
    fInvCos = float(lEnd-lStart);

    cout<<scientific<<"acos with FastInvCos(sum): "<<fTemp<<" (against unwanted optimizations)"<<endl;

    fTemp  = 0.0f; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(float i=0.0f; i<=1.0f; i=i+1.0f/lBase)
        {
            fTemp += acosf(i);
        }

    lEnd   = clock();
    fAcosf = float(lEnd-lStart);

    cout<<"acos with acosf: "<<fTemp<<" (against unwanted optimizations)"<<endl;

    fTemp  = 0.0f; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(float i=0.0f; i<=1.0f; i=i+1.0f/lBase)
        {
            fTemp += acos(i);
        }

    lEnd   = clock();
    fAcos  = float(lEnd-lStart);

    cout<<"acos with acos: "<<fTemp<<" (against unwanted optimizations)"<<endl;

    cout<<fixed<<endl;
    cout<<"FastInvCos: "<<fInvCos/CLOCKS_PER_SEC<<" sec or "<<(1/fInvCos)*lBase<<" per second and Achievement: "<<(fAcosf/fInvCos)*100-100<<"%"<<endl;
    cout<<"acosf: "<<fAcosf/CLOCKS_PER_SEC<<" sec or "<<(1/fAcosf)*lBase<<" per second and Achievement: "<<(fAcosf/fAcosf)*100-100<<"%"<<endl;
    cout<<"acos: "<<fAcos/CLOCKS_PER_SEC<<" sec or "<<(1/fAcos)*lBase<<" per second and Achievement: "<<(fAcosf/fAcos)*100-100<<"%"<<endl;
    cout<<endl;

    fInvCosAccuracy = 0.0f;

    for(float i=0.0f; i<=1.0f; i=i+1.0f/lBase)
    {
        fTemp = acos(i)-FastInvCos<float>(i);

        if(fTemp<0) fTemp = -fTemp;

        if(fInvCosAccuracy<fTemp)
            fInvCosAccuracy = fTemp;
    }

    cout<<scientific<<"Accuracy FastInvCos: "<<fInvCosAccuracy<<endl;
    cout<<fixed<<endl;
}

void TestFastInvCosDouble(long lTestCount, long lBase)
{
    double fInvCos, fAcos, fInvCosAccuracy, fTemp = 0.0f;
    long lStart, lEnd;

    cout<<"fast acos<double> (between 0 and 1):"<<endl;
    cout<<"------------------------------------"<<endl;
    cout<<endl;
    
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(double i=0.0f; i<=1.0f; i=i+1.0f/lBase)
        {
            fTemp += FastInvCos<double>(i);
        }

    lEnd    = clock();
    fInvCos = double(lEnd-lStart);

    cout<<scientific<<"acos with FastInvCos(sum): "<<fTemp<<" (against unwanted optimizations)"<<endl;

    fTemp  = 0.0f; 
    lStart = clock();

    for(long j=0; j<lTestCount; j++)
        for(double i=0.0f; i<=1.0f; i=i+1.0f/lBase)
        {
            fTemp += acos(i);
        }

    lEnd   = clock();
    fAcos  = double(lEnd-lStart);

    cout<<"acos with acos: "<<fTemp<<" (against unwanted optimizations)"<<endl;

    cout<<fixed<<endl;
    cout<<"FastInvCos: "<<fInvCos/CLOCKS_PER_SEC<<" sec or "<<(1/fInvCos)*lBase<<" per second and Achievement: "<<(fAcos/fInvCos)*100-100<<"%"<<endl;
    cout<<"acos: "<<fAcos/CLOCKS_PER_SEC<<" sec or "<<(1/fAcos)*lBase<<" per second and Achievement: "<<(fAcos/fAcos)*100-100<<"%"<<endl;
    cout<<endl;

    fInvCosAccuracy = 0.0f;

    for(double i=0.0f; i<=1.0f; i=i+1.0f/lBase)
    {
        fTemp = acos(i)-FastInvCos<double>(i);

        if(fTemp<0) fTemp = -fTemp;

        if(fInvCosAccuracy<fTemp)
            fInvCosAccuracy = fTemp;
    }

    cout<<scientific<<"Accuracy FastInvCos: "<<fInvCosAccuracy<<endl;
    cout<<fixed<<endl;
}

int main(void)
{
    TestFastSinFloat(1e3, 1e4);
    TestFastSinDouble(1e3, 1e4);
    TestFastCosFloat(1e3, 1e4);
    TestFastCosDouble(1e3, 1e4);
    TestFastInvSinFloat(1e3, 1e4);
    TestFastInvSinDouble(1e3, 1e4);
    TestFastInvCosFloat(1e3, 1e4);
    TestFastInvCosDouble(1e3, 1e4);

    return 0;
}

mit folgendem Ergebnis:

Zitat

fast Sin<float> (between 0 and pi/2):
-------------------------------------

Sin with FastSin0(sum): 9.999758e+006 (against unwanted optimizations)
Sin with FastSin1: 9.999537e+006 (against unwanted optimizations)
Sin with FastSinTn: 9.999541e+006 (against unwanted optimizations)
Sin with sinf: 9.999537e+006 (against unwanted optimizations)
Sin with sin: 9.999537e+006 (against unwanted optimizations)

FastSin0: 0.093000 sec or 107.526878 per second and Achievement: 673.118286%
FastSin1: 0.172000 sec or 58.139534 per second and Achievement: 318.023254%
FastSinTn: 0.235000 sec or 42.553192 per second and Achievement: 205.957443%
sinf: 0.719000 sec or 13.908206 per second and Achievement: 0.000000%
sin: 0.734000 sec or 13.623979 per second and Achievement: -2.043597%

Accuracy FastSin0: 1.641530e-004
Accuracy FastSin1: 1.884717e-008
Accuracy FastSinTn: 3.525043e-006

fast Sin<double> (between 0 and pi/2):
--------------------------------------

Sin with FastSin0(sum): 9.999758e+006 (against unwanted optimizations)
Sin with FastSin1: 9.999537e+006 (against unwanted optimizations)
Sin with FastSinTn: 9.999541e+006 (against unwanted optimizations)
Sin with sin: 9.999537e+006 (against unwanted optimizations)

FastSin0: 0.094000 sec or 106.382979 per sec and achieves: 663.829787%
FastSin1: 0.172000 sec or 58.139535 per sec and achieves: 317.441860%
FastSinTn: 0.187000 sec or 53.475936 per sec and achieves: 283.957219%
sin: 0.718000 sec or 13.927577 per sec and achieves: 0.000000%

Accuracy FastSin0: 1.641486e-004
Accuracy FastSin1: 1.884715e-008
Accuracy FastSinTn: 3.540202e-006

fast Cos<float> (between 0 and pi/2):
-------------------------------------

Cos with FastCos0(sum): 1.000008e+007 (against unwanted optimizations)
Cos with FastCos1: 1.018673e+007 (against unwanted optimizations)
Cos with cosf: 1.000050e+007 (against unwanted optimizations)
Cos with cos: 1.000050e+007 (against unwanted optimizations)

FastCos0: 0.109000 sec or 91.743118 per second and Achievement: 588.073364%
FastCos1: 0.234000 sec or 42.735043 per second and Achievement: 220.512817%
cosf: 0.750000 sec or 13.333333 per second and Achievement: 0.000000%
cos: 0.750000 sec or 13.333333 per second and Achievement: 0.000000%

Accuracy FastCos0: 1.187912e-003
Accuracy FastCos1: 6.481527e-009

fast Cos<double> (between 0 and pi/2):
--------------------------------------

Cos with FastCos0(sum): 1.000008e+007 (against unwanted optimizations)
Cos with FastCos1: 1.000050e+007 (against unwanted optimizations)
Cos with cos: 1.000050e+007 (against unwanted optimizations)

FastCos0: 0.125000 sec or 80.000000 per sec and achieves: 500.000000%
FastCos1: 0.218000 sec or 45.871560 per sec and achieves: 244.036697%
cos: 0.750000 sec or 13.333333 per sec and achieves: 0.000000%

Accuracy FastCos0: 1.187937e-003
Accuracy FastCos1: 2.308052e-009

fast Asin<float> (between 0 and 1):
-----------------------------------

asin with FastInvSin(sum): 5.708732e+006 (against unwanted optimizations)
asin with asinf: 5.670888e+006 (against unwanted optimizations)
asin with asin: 5.670888e+006 (against unwanted optimizations)

FastInvSin: 0.156000 sec or 64.102562 per second and Achievement: 340.384613%
asinf: 0.687000 sec or 14.556041 per second and Achievement: 0.000000%
asin: 0.672000 sec or 14.880953 per second and Achievement: 2.232143%

Accuracy FastInvSin: 6.759167e-005

fast Asin<double> (between 0 and 1):
------------------------------------

asin with FastInvSin(sum): 5.708732e+006 (against unwanted optimizations)
asin with asin: 5.708752e+006 (against unwanted optimizations)

FastInvSin: 0.156000 sec or 64.102564 per second and Achievement: 330.769231%
asin: 0.672000 sec or 14.880952 per second and Achievement: 0.000000%

Accuracy FastInvSin: 6.757051e-005

fast acos<float> (between 0 and 1):
-----------------------------------

acos with FastInvCos(sum): 1.000080e+007 (against unwanted optimizations)
acos with acosf: 9.948369e+006 (against unwanted optimizations)
acos with acos: 9.948369e+006 (against unwanted optimizations)

FastInvCos: 0.156000 sec or 64.102562 per second and Achievement: 360.897430%
acosf: 0.719000 sec or 13.908206 per second and Achievement: 0.000000%
acos: 0.688000 sec or 14.534883 per second and Achievement: 4.505814%

Accuracy FastInvCos: 6.754795e-005

fast acos<double> (between 0 and 1):
------------------------------------

acos with FastInvCos(sum): 1.000080e+007 (against unwanted optimizations)
acos with acos: 1.000078e+007 (against unwanted optimizations)

FastInvCos: 0.156000 sec or 64.102564 per second and Achievement: 341.025641%
acos: 0.688000 sec or 14.534884 per second and Achievement: 0.000000%

Accuracy FastInvCos: 6.752679e-005

Drücken Sie eine beliebige Taste . . .

Das lässt sich doch schon sehen oder? Fällt jemandem noch eine optimierung ein?

//edit:

FastSin0: aus D. H. Eberly 3D Game Engine Architecture theor. Fehler: 1,7*10e-4 erw. Gewinn: 4.0
FastSin1: aus D. H. Eberly 3D Game Engine Architecture theor. Fehler: 1,9*10e-8 erw. Gewinn: 2.8
FastSinTn: Taylorentwicklung von D. Scherfgen(s. Thread) theor. Fehler: ? erw. Gewinn: ?
FastCos0: aus D. H. Eberly 3D Game Engine Architecture theor. Fehler: 1,2*10e-3 erw. Gewinn: 4.5
FastCos1: aus D. H. Eberly 3D Game Engine Architecture theor. Fehler: 6,5*10e-9 erw. Gewinn: 2.8
FastInvSin: aus D. H. Eberly 3D Game Engine Architecture theor. Fehler: 6,8*10e-5 erw. Gewinn: 7.5
FastInvCos: aus D. H. Eberly 3D Game Engine Architecture theor. Fehler: 1,3*10e-7 erw. Gewinn: 5.8

Was es alles gibt, das ich nich brauche - Aristoteles

Zum Seitenanfang

Werbeanzeige

spieleprogrammierer.de - Forum und Wiki zur Spieleprogrammierung und Spieleentwicklung