Thread example dividing up a range

Another Approach

To run the program, you can supply two command line options. The first is the number of threads to create and the second is the number of iterations (times 1,000,000), e.g.:
./pi-atan 8 4000
Will create 8 threads, giving each thread 500,000,000 iterations (4 billion in total). The results and timings are shown at the bottom of this page. You should run this program on any computers that you have access to. The more cores you have, the more interesting the results, I'd imagine.

pi-atan.c:

/* pi-atan.c  uses long long, so no -ansi, compile with -O2          */
/* gcc -Wall -Wextra -pedantic -O2 pi-atan.c -pthread -lm -o pi-atan */

#include <stdio.h>   /* printf                  */
#include <stdlib.h>  /* exit, atoi              */
#include <pthread.h> /* thread create/join/exit */
#include <unistd.h>  /* getpid                  */
#include <math.h>    /* sqrt                    */

/* Thread data */
struct pi_data
{
  long long start; /* Starting value for thread  */
  long long end;   /* Ending value for thread    */
  long long total; /* Total number of iterations */
  double result;   /* The approximation of pi    */
  double (*threadFn)(long long, long long, long long); /* PI function */
};

double atan_pi2(long long start, long long end, long long total)
{
  double width = 1.0 / total;
  double midpoint = (start * width) + width / 2;
  double area = 0.0;
  long long i;

  for (i = start; i < end; i++)
  {
    double area_of_rectangle;

    midpoint = (i * width) + width / 2;
    double length = 4.0 / (1 + midpoint * midpoint);
    area_of_rectangle = length * width;
    area += area_of_rectangle;
    /*midpoint += width;*/
  }

  return area;
}

void *threadFn(void *p)
{
  struct pi_data *data = (struct pi_data *)p;

#ifdef DEBUG
  printf("pid: %i, tid: %u\n", getpid(), (unsigned)pthread_self());
  printf("start: %lli, end: %lli, total: %lli\n", data->start, data->end, data->total);
  /*sleep(3);*/
#endif

    /* Each thread will send the result back through the structure */
  data->result = data->threadFn(data->start, data->end, data->total);
  return NULL;
}

int main(int argc, char **argv)
{
  int i;
  int count = 1;        /* Number of threads, default is 1 */
  long long start = 0;  /* Range start                     */
  long long range;      /* Range end                       */
  double result = 0;    /* Result of calculation (PI)      */
  pthread_t *threads;   /* The array of threads            */
  struct pi_data *data; /* The array of params to threadFn */
  long long iterations; /* Number of total iterations      */

    /* Default number of iterations */
  iterations = 1000LL * 1000LL * 100LL;

    /* If user provided the number of threads to run */
  if (argc > 1)
    count = atoi(argv[1]);

    /* If the user provided the number of iterations */
  if (argc > 2)
    iterations = 1000LL * 1000LL * atoi(argv[2]);

    /* The size (range) of each thread's data */
  range = iterations / count;

    /* Threads and data structures for Pthread API */
  threads = malloc(count * sizeof(pthread_t));
  data = malloc(count * sizeof(struct pi_data));

    /* For each thread */
  for (i = 0; i < count; i++)
  {
    int result;

    data[i].start = start;
    data[i].end = start + range;
    data[i].total = iterations;
    data[i].threadFn = atan_pi2;

    result = pthread_create(&threads[i], NULL, threadFn, &data[i]);
    if (result == -1)
    {
      printf("Thread #%i failed. ", i);      
      perror("pthread_create");
    }
    start += range;
  }

    /* Wait on the threads, order is unimportant */
  for (i = 0; i < count; i++)
  {
    pthread_join(threads[i], NULL);
    result += data[i].result;
  }

    /* Show results */
  printf("   Threads: %2i\n", count);
  printf("Iterations: %10lli\n", iterations);
  printf("      atan: %14.12f\n", result);

    /* Clean up */
  free(threads);
  free(data);

  return 0;
}
Output: (Using Maya)
1 Thread2 Threads
   Threads:  1
Iterations: 4000000000
      atan: 3.141592653590

real  0m14.377s
user  0m14.357s
sys    0m0.008s
   Threads:  2
Iterations: 4000000000
      atan: 3.141592653590

real  0m7.253s
user  0m14.476s
sys    0m0.008s
4 Threads8 Threads
   Threads:  4
Iterations: 4000000000
      atan: 3.141592653590

real  0m3.788s
user  0m15.108s
sys    0m0.008s
   Threads:  8
Iterations: 4000000000
      atan: 3.141592653590

real  0m3.785s
user  0m29.897s
sys    0m0.019s


1 Thread2 Threads3 Threads4 Threads
   Threads:  1
Iterations: 4000000000
      atan: 3.141592653590

real  0m14.377s
user  0m14.357s
sys    0m0.008s
   Threads:  2
Iterations: 4000000000
      atan: 3.141592653590

real  0m7.253s
user  0m14.476s
sys    0m0.008s
   Threads:  3
Iterations: 4000000000
      atan: 3.141592653090

real  0m4.933s
user  0m14.760s
sys    0m0.007s
   Threads:  4
Iterations: 4000000000
      atan: 3.141592653590

real  0m3.788s
user  0m15.108s
sys    0m0.008s
5 Threads6 Threads7 Threads8 Threads
   Threads:  5
Iterations: 4000000000
      atan: 3.141592653590

real  0m4.567s
user  0m18.176s
sys    0m0.010s
   Threads:  6
Iterations: 4000000000
      atan: 3.141592651590

real  0m4.240s
user  0m22.706s
sys    0m0.016s
   Threads:  7
Iterations: 4000000000
      atan: 3.141592652090

real  0m4.292s
user  0m26.174s
sys    0m0.017s
   Threads:  8
Iterations: 4000000000
      atan: 3.141592653590

real  0m3.784s
user  0m29.924s
sys    0m0.020s
9 Threads10 Threads11 Threads12 Threads
   Threads:  9
Iterations: 4000000000
      atan: 3.141592651590

real  0m4.023s
user  0m29.745s
sys    0m0.020s
   Threads: 10
Iterations: 4000000000
      atan: 3.141592653590

real  0m3.854s
user  0m29.755s
sys    0m0.020s
   Threads: 11
Iterations: 4000000000
      atan: 3.141592650090

real  0m3.869s
user  0m29.786s
sys    0m0.020s
   Threads: 12
Iterations: 4000000000
      atan: 3.141592651590

real  0m3.802s
user  0m29.766s
sys    0m0.020s