#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#ifdef _MSC_VER
#include <windows.h>
#include <process.h>
#elif defined(__GNUC__)
#include <pthread.h>
#else
#error
#endif
#ifndef BLOCK
# define BLOCK 100
#endif
#ifndef SiZe
# define SiZe 1300
#endif
#ifndef THREADS
#define THREADS 2
#endif
typedef double t;
typedef long long f_type;
struct thread_info {
int n, t_id, first, last, block;
t (*matr)[SiZe][SiZe + 1];
volatile f_type *flags;
};
int barrier(int number_of_threads, volatile f_type *flags, int id) {
int32_t buf;
f_type mask_f;
mask_f = ((f_type) 1 << (63 - id));
#ifdef _MSC_VER
__asm {
mov eax, 1
mov ebx, flags
lock xadd dword ptr [ebx], eax
mov buf, eax
}
#elif defined(__GNUC__)
buf = 1;
asm volatile ("lock xadd %0, %1"
: "=r"(buf), "=m"(*flags)
: "0"(buf)
);
#endif
if (buf == number_of_threads - 1) {
(*flags) = mask_f;
return 1;
} else {
while (((*flags) & mask_f) == 0) {}
return 0;
}
}
#ifdef _MSC_VER
unsigned __stdcall
#elif defined(__GNUC__)
void *
#endif
thread(void *arg) {
int i, j, k, l, k1 = 0, k2 = 0, n = SiZe, block = BLOCK, f1, f2, buf, th_n;
struct thread_info *target = (struct thread_info *) arg;
struct thread_info *targ;
targ = target - target->t_id;
n = targ->n;
while (k2 < n){
if (k1 + block < n){ k2 += block;} else { k2 = n; }
buf = barrier(THREADS, targ->flags, 0);
if (buf == 1) {
for (k = k1; k < k2; k++){
for (j = k + 1; j <= n; j++) { (*(*targ).matr)[k][j] /= (*(*targ).matr)[k][k];}
for (i = k + 1; i < k2; i++) {
for (j = k + 1; j <= n; j++) {
(*(*targ).matr)[i][j] -= (*(*targ).matr)[k][j] * (*(*targ).matr)[i][k];
}
}
}
f1 = k2;
f2 = k2;
l = 0;
th_n = (n - k2 + block - 1) / block;
th_n = (th_n + THREADS - 1) / THREADS;
while (l < THREADS){
if (f1 + block * th_n < n){ f2 += block * th_n; } else { f2 = n; }
targ[l].first = f1;
targ[l].last = f2;
l++;
f1 += block * th_n;
}
}
barrier(THREADS, targ->flags, 1);
f1 = targ[target->t_id].first;
f2 = targ[target->t_id].first;
while (f2 < targ[target->t_id].last){
if (f1 + block < targ[target->t_id].last){ f2 += block; } else { f2 = targ[target->t_id].last; }
for (k = k1; k < k2; k++){
for (i = f1; i < f2; i++) {
for (j = k + 1; j <= n; j++) {
(*(*targ).matr)[i][j] -= (*(*targ).matr)[k][j] * (*(*targ).matr)[i][k];
}
}
}
f1 += block;
}
k1 += block;
}
return NULL;
}
void gauss1(int n, t (*matr)[SiZe][SiZe + 1]){
int i, block = BLOCK;
#ifdef _MSC_VER
HANDLE threads[THREADS];
#elif defined(__GNUC__)
pthread_t threads[THREADS];
#endif
struct thread_info *thread_arg = (struct thread_info *)malloc(THREADS * sizeof(struct thread_info));// ìàññèâ èíôû î ïîòîêàõ
volatile f_type *flags = (volatile f_type *)malloc(sizeof(f_type));
*flags = 0;
for (i = 0; i < THREADS; i++) {
thread_arg[i].t_id = i;
thread_arg[i].n = n;
thread_arg[i].matr = matr;
thread_arg[i].flags = flags;
thread_arg[i].block = block;
#ifdef _MSC_VER
threads[i] = (HANDLE)_beginthreadex(NULL, 0, &thread, &thread_arg[i], 0, NULL);Sleep(0);
#elif defined(__GNUC__)
pthread_create(&threads[i], NULL, &thread, &thread_arg[i]);
#endif
}
#ifdef _MSC_VER
WaitForMultipleObjects(THREADS - 1, threads, TRUE, INFINITE );
#endif
for (i = 1; i < THREADS; i++)
#ifdef _MSC_VER
CloseHandle( threads[i] );
#elif defined(__GNUC__)
pthread_join(threads[i], NULL);
#endif
free(thread_arg);
}
void gauss2(int n, t (*matr)[SiZe][SiZe + 1]){
int i, k;
for ( k = n - 1; k > 0; k--){
for ( i = k - 1; i >= 0; i--){
(*matr)[i][n] -= (*matr)[k][n] * (*matr)[i][k];
}
}
}
int main(){
const int n = SiZe;
int j, k;
t T, err;
t (*matr)[n][n + 1] = (t (*)[n][n + 1])malloc(n * (n + 1) * sizeof(t));
t *x = (t*)malloc(n*sizeof(t));
srand( 1 );
for (j = 0; j < n; j++){
for (k = 0; k <= n; k++){
(*matr)[j][k] = rand() % 1000 + 1.0;
}
}
gauss1(n, matr);
gauss2(n, matr);
printf("n = %d, BLOCK = %d, THREADS = %d; \n", n, BLOCK, THREADS );
for ( j=0; j<n; j++ ) x[j] = (*matr)[j][n];
srand( 1 );
for (j = 0; j < n; j++){
for (k = 0; k <= n; k++){
(*matr)[j][k] = rand() % 1000 + 1.0;
}
}
err = 0.0;
for (j = 0; j < n; j++){
T = -(*matr)[j][n];
for (k = 0; k < n; k++) T += (*matr)[j][k] * x[k];
err += T*T;
}
err = err/n;
printf("err = %g\n", err);
free( x );
free((void*)matr);
return 0;
}