POSIX = Portable Operating System Interface [for Unix]
by Andrea Mantoni
Ultima modifica: %%mtime(%d %B %Y)

 
 = Introduzione =

*POSIX = Portable Operating System Interface for uniX*
è una famiglia di standards per le syscalls/APIs degli OS Unix-like proposto dallo IEEE.

Lo scopo dello standard è quello di unificare le precendenti varianti di Unix:
 - System V / SysV / SV / SVID = System V Interface Definition
 - BSD = Berkeley Software Distribution
 
Esistono diverse versioni di POSIX:
 - POSIX.1 = IEEE Std 1003.1-1988, Core Services (incorporates Standard ANSI C)
    - Process Creation and Control
    - Signals
    - Floating Point Exceptions
    - Segmentation Violations
    - Illegal Instructions
    - Bus Errors
    - Timers
    - File and Directory Operations
    - Pipes
    - C Library (Standard C)
    - I/O Port Interface and Control
 - POSIX.2 = IEEE Std 1003.2-1992, Shell and Utilities
    - Command Interpreter
    - Utility Programs
 - POSIX.1b (a.k.a. POSIX.4) = IEEE Std 1003.1b-1993, Real-time extensions
    - Priority Scheduling
    - Real-Time Signals
    - Clocks and Timers
    - Semaphores
    - Message Passing
    - Shared Memory
    - Asynch and Synch I/O
    - Memory Locking Interface
 - POSIX.1c (a.k.a. POSIX.1-1996) = IEEE Std 1003.1c-1995, POSIX Threads extensions
    - Thread Creation, Control, and Cleanup
    - Thread Scheduling
    - Thread Synchronization
    - Signal Handling
 - POSIX.1d = IEEE Std 1003.1c-1999, additional real-time extensions
 - POSIX.1g = IEEE Std 1003.1g-2000, networking APIs including sockets
 - POSIX.1j = IEEE Std 1003.1j-2000, describing advanced real-time extensions
 - POSIX.1-2001 (IEEE Std 1003.1-2001, Single UNIX Specification version 3) (not yet approved by the ISO)
 - POSIX.1-2004 (IEEE Std 1003.1-2004)
 - POSIX.1-2008 (IEEE Std 1003.1-2008, Single UNIX Specification version 4)

NOTA: In questo documento tratteremo solo le funzionalità di POSIX
che non fanno parte della standard library del C.


 == Note legali su POSIX ==
 
Attualmente il detentore del marchio Unix è The Open Group,
un consorzio di industrie ed organizzazioni non-profit.

The Open Group ha affidato a The Austin Group di definire gli standard per le syscall di Unix.

Le specifiche sono in continuo aggiornamento, e sono pubblicate sul sito http://www.opengroup.org
con il nome di *SUS = Single UNIX Specification*.
Una volta pubblicate, vengono approvate dallo IEEE (IEEE Std 1003.1), e quindi dallo ISO (ISO/IEC 9945).
_Il marchio "POSIX" è detenuto dalla IEEE, quindi è sufficiente la sua approvazione per l'estenzione dello standard._

OSS.: altri progetti di standardizzazione:
 - XPG = X/Open Portability Guide (outdated, now merged into SUS)
 - LSB = Linux Standard Base


 == Header/Include files ==
 
	#include <unistd.h> // Unix standard library main include file
	#include <dirent.h> // manipolazione delle directories
	#include <fcntl.h> // file control over opened files (locking)
	#include <grp.h> // user group information and control
	#include <pwd.h> // passwd (user information) access and control
	#include <sys/stat.h> // lettura proprietà dei files
	#include <sys/times.h>
	#include <termios.h> // terminal I/O
	#include <sys/wait.h> // macros for reading the status of terminated child processes
	#include <sys/sem.h> // System V-style semaphores
	#include <sys/mman.h> // memory management, shared memory, memory mapped I/O
	#include <sys/utsname.h> // system information
	#include <sys/statvfs.h> // VFS File System information
	#include <utime.h> // edit access and modification times
	#include <tar.h> // magic numbers for the tar archive format
	#include <netdb.h> // functions for translating protocol names and host names into numeric addresses. Searches local data as well as DNS
	#include <netinet/in.h> // internet address families. Widely used on the Internet, these include IP addresses and TCP and UDP port numbers
	#include <netinet/tcp.h>
	#include <fmtmsg.h> // print formatted error messages
POSIX.2-only: (1992)
	#include <regex.h> // espressioni regolari
	#include <fnmatch.h>
	#include <wordexp.h>
	#include <glob.h>
POSIX.1b-only: (1993)
	#include <aio.h> // Asynchronous Input and Output
	#include <mqueue.h> // POSIX message queues (REALTIME)
	#include <sched.h> // process/execution scheduling (REALTIME)
	#include <semaphore.h> // POSIX semaphores (REALTIME)
POSIX.1c-only: (1996)
	#include <pthread.h> // POSIX Threads
UNIX95/SUSv1-only:
	#include <curses.h> // XCURSES = X/Open Curses
	#include <sys/socket.h> // BSD Sockets (XNS = X/Open Networking Services)
	#include <sys/un.h> // local socket address family
	#include <arpa/inet.h> // functions for manipulating numeric IP addresses
	#include <sys/time.h> // time and date functions and structures
	#include <cpio.h> // magic numbers for the cpio archive format
	#include <sys/time.h>
	#include <sys/timeb.h>
	#include <sys/uio.h> // Vectored/Scatter-Gather I/O
	#include <ftw.h> // file tree traversal
	#include <iconv.h> // char codeset conversion
	#include <langinfo.h> // language information
	#include <libgen.h> // pattern matching
	#include <monetary.h>
	#include <ndbm.h> // ndbm database
	#include <nl_types.h>
	#include <poll.h>
	#include <search.h> // search tables
	#include <stropts.h> // STREAMS interface
	#include <strings.h> // more string utilites
	#include <sys/msg.h> // XSI message queues
	#include <sys/ipc.h> // XSI IPC = Inter-Process Communication
	#include <sys/shm.h> // XSI shared memory
	#include <sys/resource.h> // XSI resource operations
	#include <syslog.h> // system error logging
	#include <ucontext.h> // user context
	#include <ulimit.h>
	#include <utmpx.h>
	#include <net/if.h>
UNIX98/SUSv2-only: http://www.unix.org/version2/whatsnew/whatsnew.html
	#include <dlfcn.h> // dynamic linking loading
SUSv3/POSIX.1-2001-only: http://www.unix.org/version3/apis/headers.html
	#include <spawn.h>
	#include <trace.h>
POSIX.1-2003-only:
[...]
SUSv4/POSIX.1-2008-only:
[...]

inclusioni automatiche:
 [...]
 
 
 == Tipi derivati ==

definiti in <sys/types.h>:
	caddr_t = core addess
	dev_t = device number
	gid_t = Group-ID
	ino_t = inode number
	key_t = System V IPC
	off_t, loff_t = current file position
	mode_t = bitmask, tipo di file e permessi (vedi "Macros")
	pid_t = Process-ID
	rlim_t = limiti sulle risorse
	sigset_t = insieme di segnali
	ssize_t = numero di bytes
	ptrdiff_t = differenza tra due puntatori
	uid_t = User-I"mode"D

da <utime.h>:
	struct utimbuf {
		time_t actime; // last access time
		time_t modtime // last modification time
	};

da <sys/stat.h>:
	struct stat {
		mode_t    st_mode;    // tipo di file e permessi
		dev_t     st_rdev;    // device ID (if file is character or block special)
	
		dev_t     st_dev;     // ID of device containing file
		ino_t     st_ino;     // inode number
		nlink_t   st_nlink;   // number of hardlinks to the file

		uid_t     st_uid;     // user ID of file
		gid_t     st_gid;     // group ID of file
	
		off_t     st_size;    // file size in bytes (if file is a regular file)
		blkcnt_t  st_blocks;  // number of blocks allocated
		blksize_t st_blksize; // a filesystem-specific preferred I/O block size for this object. In some filesystem types, this may vary from file to file
	
		time_t    st_atime;   // time of last access
		time_t    st_mtime;   // time of last data modification
		time_t    st_ctime;   // time of last status change	
	};

da <dirent.h>:
	DIR = directory stream
	struct dirent {
		ino_t  d_ino; // inode number
		char   d_name[]; // filename
	};


da <termios.h>:
	struct termios {
		tcflag_t  c_iflag; // Input modes
		tcflag_t  c_oflag; // Output modes
		tcflag_t  c_cflag; // Control modes
		tcflag_t  c_lflag; // Local modes
		cc_t      c_cc[NCCS]; // Control characters
	};


 == Macros ==
 
per testare la conformità delle proprie librerie ai vari standard Unix si possono usare
le seguenti *Unix Standards Test Macros* definite in <unistd.h>:

	if defined(unix) || defined(__unix__) || defined(__unix)

	if defined(_POSIX_VERSION)
		if (_POSIX_VERSION = 198808L) // compilant to POSIX.1-1988
		if (_POSIX_VERSION = 199009L) // compilant to POSIX.1-1990
		if (_POSIX_VERSION = 199309L) // compilant to POSIX.1b-1993
		if (_POSIX_VERSION = 199506L) // compilant to POSIX.1-1996
		if (_POSIX_VERSION = 200112L) // compilant to POSIX.1-2001
	
	if defined(_POSIX2_VERSION)
		if (_POSIX2_C_VERSION = 199209L) // compilant to POSIX.2

	if defined(_XOPEN_VERSION)
		if (_XOPEN_VERSION >= 3) // compilant to XPG3 = X/Open Portability Guide 3 (1989)
		if (_XOPEN_VERSION >= 4) // compilant to XPG4 = X/Open Portability Guide 4 (1992)
		if (_XOPEN_VERSION >= 4) && defined(_XOPEN_UNIX) // compilant to SUS = X/Open Single UNIX Specification (UNIX95)
		if (_XOPEN_VERSION >= 500) // compilant to SUSv2 = X/Open Single UNIX Specification, Version 2 (UNIX98)
		if (_XOPEN_VERSION >= 600) // compilant to SUSv3 = Open Group Single UNIX Specification, Version 3 (UNIX03)

GNULIBC-only?
{
	__STRICT_ANSI__
	_POSIX_SOUCE (obsoleta)
	_POSIX_C_SOUCE = 1 -> POSIX.1
	_POSIX_C_SOUCE = 2 -> POSIX.2
	_BSD_SOURCE (include anche POSIX.2)
	_SVID_SOURCE
	_XOPEN_SOURCE
	_ISOC99_SOURCE
}

File Descriptor Flags da <fcntl.h>:
	FD_CLOEXEC = the descriptor is closed on exec

File Status Flags (per settare la bitmask "mode_t" when opening files, can be read with "fcntl"):
	Access Modes:
		O_RDONLY
		O_WRONLY
		O_RDWR
	Open-time Flags:
		O_CREAT = create file if it does not exist, overwrite if exist
		O_CREAT|O_EXCL = create file if it does not exist, fail if exist
		O_TRUNC = truncate file if it exists (must be opened in write mode)
		O_NONBLOCK
		O_NOCTTY
	Operating Modes:
		O_APPEND = no overwrite allowed, seek and you can write only @ the end of file
		O_NONBLOCK

per testare la bitmask "mode_t" (ritornata ad esempio dalla "stat")
da <sys/stat.h>:
	S_ISREG(m) // Test for a regular file. 
	S_ISDIR(m) // Test for a directory. 
	S_ISBLK(m) // Test for a block special file. 
	S_ISCHR(m) // Test for a character special file. 
	S_ISFIFO(m) // Test for a pipe or FIFO special file. 
	S_ISLNK(m) // Test for a symbolic link.

per settare i permessi di "mode_t" (usati da "creat", "chmod" e "umask")
si può utilizzare la notazione ottale,
oppure da <sys/stat.h>:
	S_IRWXU // read, write, execute/search by owner 
	S_IRUSR // read permission, owner 
	S_IWUSR // write permission, owner 
	S_IXUSR // execute/search permission, owner 
	S_IRWXG // read, write, execute/search by group 
	S_IRGRP // read permission, group 
	S_IWGRP // write permission, group 
	S_IXGRP // execute/search permission, group 
	S_IRWXO // read, write, execute/search by others 
	S_IROTH // read permission, others 
	S_IWOTH // write permission, others 
	S_IXOTH // execute/search permission, others 
	S_ISUID // set-user-ID on execution 
	S_ISGID // set-group-ID on execution 
	S_ISVTX // on directories, restricted deletion flag 
}

reserved names:
 - The header file `dirent.h' reserves names prefixed with `d_'.
 - The header file `fcntl.h' reserves names prefixed with `l_', `F_', `O_', and `S_'.
 - The header file `grp.h' reserves names prefixed with `gr_'.
 - The header file `limits.h' reserves names suffixed with `_MAX'.
 - The header file `pwd.h' reserves names prefixed with `pw_'.
 - The header file `signal.h' reserves names prefixed with `sa_' and `SA_'.
 - The header file `sys/stat.h' reserves names prefixed with `st_' and `S_'.
 - The header file `sys/times.h' reserves names prefixed with `tms_'.
 - The header file `termios.h' reserves names prefixed with `c_', `V', `I', `O', and `TC'; and names prefixed with `B' followed by a digit. 


 == Convenzioni valori di ritorno ==
 
per le funzioni che ritornano valori interi:
	0 = SUCCESS
    -1 = ERROR
 
per le funzioni che ritornano puntatori:
	NULL = ERROR
	>0 = SUCCESS

MEMO: controllare anche la variabile globale "errno".


 = I/O =
 
Sono possibili 2 tipi di I/O:
 - High-level (+ portabile, binary/text mode), usando i *file streams* della stdlib -> vedi [c.txt]
 - Low-level (solo in binary mode), usando i *file descriptors* del SO
 
E' possibile combinare l'I/O low level e quello high level con
le seguenti funzioni da <stdio.h>:
	int fileno( FILE* stream ); // return file descriptor of stream
	FILE* fdopen( int fd, char* mode ); // associate a stream with an existing file descriptor
 
 
 == High-level I/O ==
 
L'interfaccia della stdlib è stata mantenuta inalterata.
In POSIX.2 sono state aggiunte alcune funzioni per la manipolazione di stringhe
ed il supporto alle espressioni regolari.
 
 
 === File utilities ===
 
lettura da un file descriptor con delimitatori (text mode):
	ssize_t getline (char** lineptr, size_t* n, FILE* stream);
	ssize_t getdelim(char** lineptr, size_t* n, int delimiter, FILE* stream);
NOTA: riallocano automaticamente il buffer puntato da "lineptr" se quello passato è troppo piccolo (o se è NULL)
Ritornano il numero di chars letti o "-1" in caso di errore.

esempio:
	// letture sequenziale linee di un file
	FILE* f = fopen("foobar.txt", "r");
	if(!f) exit(1);
	char* line_buffer = NULL;
    size_t line_buffer_len = 0;
    ssize_t read;
	while ((read = getline(&line_buffer, &line_buffer_len, f)) != -1) {
		printf("Retrieved line of length %zu :\n", read);
		printf("%s", line_buffer);
		// ...
	}
	free(line);
    fclose(f);


 === Strings utilities ===
 
da <strings.h>:
	char* strdup( const char* ); // Duplica una stringa in memoria dinamica (_deve_ essere deallocata manualmente con "free")
	void* memccpy( void* dst, const void* src, int c, size_t n ); // come memcpy, ma interrompe la copia al primo byte con valore "c"
case-insensitive string comparisons:
    int strcasecmp( char*, char* ); // Confronta la stringa "s1" con "s2" ignorando il casing dei chars
    int strncasecmp( char*, char*, size_t); // Confronta al massimo n caratteri della stringa "s1" con "s2" ignorando il casing dei chars
 ritornano:
  0 se le stringhe sono uguali
  !0 se le stringhe sono diverse
	 >0 se la prima stringa è _maggiore_ della seconda = il primo carattere diverso è maggiore (non c'entra la lunghezza!)
	 <0 se la prima stringa è _minore_ della seconda = il primo carattere diverso è minore (non c'entra la lunghezza!)

Da <fmtmsg.h>:
[...]


 === Command-line args parser ===
 
Da unistd.h:
	int getopt (int argc, char **argv, const char *options);
"options" contiene tutti i caratteri _singoli_ usati come opzioni, elancati uno dopo l'altro senza spazi.
  Non sono ammessi simboli (come ad es. "-").
  Se un opzione richiede un argomento deve essere seguita da ":".
 
Ad ogni invocazione la funzione ritorna
 the option character for the next command line option,
 oppure "-1" when no more option arguments are available.

NOTE: POSIX demands the following behavior: The first non-option stops option processing.
      This mode is selected by either setting the environment variable POSIXLY_CORRECT or beginning the options  argument string with a plus sign (‘+’)
      Quindi la sintassi usata deve essere:
		program [options] [obects (files, devices, etc.)]

If the option has an argument, getopt returns the argument by storing it in the global variable optarg. You don't ordinarily need to copy the optarg string, since it is a pointer into the original argv array, not into a static area that might be overwritten. 
If getopt finds an option character in argv that was not included in options, or a missing option argument, it returns ‘?’ and sets the external variable optopt to the actual option character. If the first character of options is a colon (‘:’), then getopt returns ‘:’ instead of ‘?’ to indicate a missing option argument. In addition, if the external variable opterr is nonzero (which is the default), getopt prints an error message. 

esempio:
	#include <unistd.h>

	int main (int argc, char **argv)
    {
		opterr = 0;
		optind = 1; // index of currently processed arg
		while ((c = getopt (argc, argv, "+abc:")) != -1)
			switch (c)
			{
			   case 'a':
				 // opzione "-a"
				 break;
	
			   case 'b':
				 // opzione "-b"
				 break;
	
			   case 'c':
			     // opzione "-c arg"
				 // l'argomento è puntato da "optarg" SOLO FINO ALLA PROSSIMA INVOCAZIONE DI GETOPT!
				 break;
	
			   case '?':
				 if (optopt == 'c')
				   fprintf (stderr, "Option -%c requires an argument.\n", optopt);
				 else if (isprint (optopt))
				   fprintf (stderr, "Unknown option `-%c'.\n", optopt);
				 else
				   fprintf (stderr,
							"Unknown option character `\\x%x'.\n",
							optopt);
				 break;

			   default:
			     // altro errore
				 abort();
			}

		// reading Non-option arguments
		int index;
		for (index = optind; index < argc; index++)
			printf ("Non-option argument: %s\n", argv[index]);

	// ...

	}     


 === Regular expressions ===
 
[...] -> http://www.gnu.org/software/libc/manual/html_node/Regular-Expressions.html
Da <regex.h> (POSIX.2-only):
	int regcomp(regex_t *restrict compiled, const char *restrict pattern, int cflags)
...


 == Low-level I/O ==

Si basa sull'uso di un *file descriptor* = unsigned int
 che fa riferimento ad una entry nella *open file descriptor table* del processo.

In Unix-like systems, file descriptors can refer to
 files, directories, block or character devices (also called "special files"), sockets, FIFOs (also called named pipes), or unnamed pipes.

I file standard aperti automaticamente sono:
	 STDIN_FILENO = 0
	STDOUT_FILENO = 1
	STDERR_FILENO = 2

apertura di un file:
	int open( char* filename, int flags );
 "flags" è una bitmask che indica la modalità di apertura,
 è ricavata dalle seguenti costanti definite in <fcntl.h>:
access mode:
	O_RDONLY = open for reading only
	O_WRONLY = open for writing only
	O_RDWR = open for reading and writing
open-time flags:
	O_APPEND = scritture eseguite sempre e solo alla fine del file
	O_CREAT = se esiste apre il file specificato, se non esiste ne crea uno vuoto (never fail)
	O_CREAT|O_EXCL = se il file esiste già fallisce, se non esiste ne crea uno vuoto
	O_TRUNC = truncate opened file to 0 bytes (è richiesta apertura in scrittura)
	O_CREAT|O_TRUNC = se esiste tronca il file specificato, se non esiste ne crea uno vuoto (never fail)
?	O_SYNC, O_DSYNC = scritture sincronizzate
?	O_RSYNC = letture sincronizzate
	O_NOCTTY
	O_NONBLOCK = rende l'I/O non-bloccante (può produrre degli errori)
Ritorna il file descriptor dell'oggetto aperto, o "-1" in caso di errore, e setta la variabile globale "errno".

creazione/troncamento ed apertura di un nuovo file vuoto:
	int creat( char* filename, mode_t mode ); // equivalente a: "open( filename, O_WRONLY|O_CREAT|O_TRUNC, mode );"
esempio d'uso:
	// crea o svuota "newfile.txt" nella current working dir con tutti i permessi abilitati
	int fd = creat( "newfile.txt", 777 );

chiusura di un file descriptor:
	int close( int fd );

lettura/scrittura da/su un file descriptor:
	ssize_t read( int fd, void* buf, size_t count ); // legge fino a "count" bytes da "fd" in "buf"
	ssize_t write( int fd, void* buf, size_t count ); // scrive fino a "count" bytes in "fd" da "buf"
Ritornano entrambe il n° di bytes letti/scritti effettivamente.
Ritornano in caso di errore "-1", e settano la variabile globale "errno".
La "read" continua a ritornare "0" se si è ragginto l'EOF.
NOTA: possono essere bloccanti, ovvero il processo resta bloccato finchè non si è conclusa l'operazione di I/O. -> vedi [os.txt] per gli stati d'esecuzione dei processi
SOL.: usare I/O atomico (garantito con O_APPEND, oppure usando un buffer <= PIPE_BUF)

seeking:
	off_t lseek( int fd, off_t offset, int whence ); // si sposta di "offset" da "whence" (può essere: SEEK_SET=BOF, SEEK_CUR=current, SEEK_END=EOF)
Ritorna il current file position.
Ritorna "-1" in caso di errore (ad es. seeking prima dell'inizio del file).
NOTA: non comportano operazioni di I/O e quindi non sono bloccanti.
esempio d'uso:
	lseek( fd, 0, SEEK_SET ); // si posiziona all'inizio del file = stdlib's "rewind"
	lseek( fd, 0, SEEK_END ); // si posiziona alla fine del file
	off_t pos = lseek( fd, 0, SEEK_CUR ); // ritorna la posizione corrente nel file = stdlib's "ftell"


 == gestione del file system ==

lettura proprietà di un file:
	int stat( char* filename, struct stat* buf );
	int lstat( char* filename, struct stat* buf ); // VARIANTE: non effettua il resolving dei link simbolici
 "filename" è il pathname del file (assoluto o relativo alla dir. corrente)
esempio d'uso:
	// testa se "file" esiste
	struct stat buf;
	if( stat( "file", &buf ) == -1 ) {
		puts("\"file\" does not exist");
	}
	// testa se "dir" esiste ed è una directory
	if( stat( "dir", &buf ) == 0 ) {
		if( S_ISDIR( buf.st_mode ) ) {
			puts("\"dir\" is a directory");
		}
	}

check user's permissions for a file according to its REAL-UID/GID:
	int access( char* filename, int mode );
"mode" è una bitmask ottenura dalle seguenti costanti:
	R_OK = test for Read permission
	W_OK = test for Write permission
	X_OK = test for eXecute permission
	F_OK = test for File existence

change permissions of a file:
	int chmod( char* filename, mode_t mode );

change owner and group of a file:
	int chown( char* filename, uid_t owner, gid_t group );
    
change access and/or modification times of a file:
	int utime( char* filename, struct utimbuf* times );

hardlink a file:
	int link( char* old_filename, const* new_filename );

unlink/delete a file:
	int unlink( char* filename );

move/rename a file:
	int rename( char* old_filename, char* new_filename );

truncate/expand a file:
	int truncate( char* filename, off_t length );
	int ftruncate( int fd, off_t length ); 


 === gestione delle directories ===
 
create an empty directory:
	int mkdir( char* dirname, mode_t mode );

delete an empty directory:
	int rmdir( char* dirname );

Per leggere il contenuto delle directory, Unix fornisce degli oggetti
tipo *directory stream* "DIR*".
L'ordinamento dei files _non_ è quello alfabetico.

open a directory:
	DIR* opendir( char* dirname );

close a directory:
	int closedir( DIR* dir );
    
read next directory entry:
	struct dirent* readdir( DIR* dir );
Ritorna NULL alla fine della directory od in caso di errore.
NOTA: la struct ritornata è allocata staticamente all'interno della funzione e può essere sovrascritta da chiamate successive.

reset directory stream:
	void rewinddir( DIR *dir );

esempio d'uso:
	// stampa il contenuto di "mydir"
	DIR* mydir = opendir( "mydir" );
	struct dirent* f;
	while( ( f = readdir( mydir ) ) != NULL ) {
		puts( f->d_name ); // stampa il filename corrente
	}
	closedir( mydir );

 
 == I/O models ==
 
Sono possibili diverse modalità di I/O:
 - *blocking I/O* (default)
 - *nonblocking I/O* = le syscalls falliscono invece di bloccare il processo
 - *asynchronous / signal driven* I/O = le syscalls ritornano subito, quando l'I/O è completato il processo può ricevere un segnale
     varie implementazioni: -> non molto portabile
     - System V's (STREAMS + SIGPOLL)
     - BSD's (SIGIO and SIGURG)
     - POSIX' ("aio_read/write")
 - (blocking) *multiplexing I/O* (con "select") = per usare + file descriptors contemporaneamente senza multiprocessing o multithreading
 - memory mapped I/O (con "mmap") = accesso ad un file come un buffer/array in memoria principale. -> puo' semplificare la logica di I/O

per abilitare il nonblocking I/O:
    - If we call open to get the descriptor, we can specify the O_NONBLOCK flag.
    - For a descriptor that is already open, we call fcntl to turn on the O_NONBLOCK file status flag.


 === Asynchronous I/O ===
 
primitive per asynchronous I/O, da <aio.h> (POSIX.1b):
	int 	aio_read( struct aiocb* ); // invia un'operazione di lettura
	int 	aio_write( struct aiocb* ); // invia un'operazione di scrittura
	int 	aio_cancel(int, struct aiocb *); // annulla un'operazione (se non ancora completata)
	int 	lio_listio(int, struct aiocb *const[], int, struct sigevent *); // invia + operazioni di I/O
	int 	aio_suspend(const struct aiocb *const[], int, const struct timespec *); // attende la terminazione di una o più operazioni in maniera sincrona
	int     aio_fsync(int, struct aiocb *); // equivalente di "fsync" asincrono
	ssize_t aio_return(struct aiocb *); // se l'operazione è terminata ritorna il suo return value
	int     aio_error(const struct aiocb *); // se l'operazione è terminata e fallita ritorna il valore di errno (oppure "EINPROGRESS") -> utile per monitorare lo stato dell'operazione
Ogni operazione di I/O è descritta con una "struct aiocb":
	struct aiocb {
		int             aio_fildes;     // file descriptor (aperto mediante "open")
		off_t           aio_offset;     // file starting offset
		void*           aio_buf;        // buffer to read/write from
		size_t          aio_nbytes;     // number of bytes to read/write
		struct sigevent aio_sigevent;   // signal to deliver on completion
		int             aio_lio_opcode; // operation to be performed (lio_listio only)
		int             aio_reqprio;    // AIO priority
	};
	struct sigevent {
		int                      sigev_notify;            // notification type (SIGEV_NONE = no signal)
		int                      sigev_signo;             // signal number
		union sigval             sigev_value;             // signal value
		void(*)(union sigval)    sigev_notify_function;   // notification function
		(pthread_attr_t*)        sigev_notify_attributes; // notification attributes
	};
NOTA: NON è possibile riutilizzare la stessa istanza di "struct aiocb" per + operazioni di AIO!
NOTA: è possibile alternare operazioni AIO con operazioni di I/O blocking.
Tuttavia è necessario:
 - attendere che tutte le operazioni di AIO siano terminate
 - resettare il file position offset con "lseek"

 
 === Multiplexing I/O ===

Per consentire ad un programma di attendere la disponibilità di dati su + descrittori contemporaneamente ci sono varie soluzioni:
 - blocking I/O + multiprocessing / mutithreading -> ogni processo/thread gestisce un descrittore separato
 - asynchronous I/O
 - multiplexing I/O
 
L'I/O multiplexing è realizzato con la funzione "select" in <sys/select.h> (o <sys/time.h>)
che testa più file descriptors per verificare la loro disponibilità immediata ad effettuare I/O o la presenza di eccezioni:
a descriptor is considered "ready" if a read/write from that descriptor won't block, or if an exception condition is pending on that descriptor.

NOTA: questa funzione può controllare ogni tipo di descrittore, comunque il suo uso principale è per il network I/O.
(File descriptors for regular files always return ready for reading, writing, and exception conditions)
 
Nella invocazione della "select" bisogna definire:
 - *descriptors set* da controllare
 - *condizioni* da controllare (read/write/exceptions)
 - tempo di attesta (definito/indefinito)

Il tempo di attesa è descritto mediante la
	struct timeval {
		long tv_sec;     /* seconds */
		long tv_usec;    /* and microseconds */
	};

Un descriptors set è rappresentato con il tipo "fd_set" =
una bitmask in cui ogni bit coincide con un file descriptor.
(ad es. il 16° bit <-> file descriptor no 16)
primitive gestione descriptors sets: 
	void FD_ZERO(fd_set *fdset); // inizializza
	void FD_SET(int fd, fd_set *fdset);
	void FD_CLR(int fd, fd_set *fdset);
	int FD_ISSET(int fd, fd_set *fdset); // Returns: nonzero if fd is in set, 0 otherwise
esempio d'uso:
	fd_set ds;
	FD_ZERO( &ds );
	FD_SET( STDIN_FILENO, &d );

prototipo "select":
	int select( int maxfdp1, fd_set* readfds, fd_set* writefds, fd_set* exceptfds, struct timeval* tvptr );
"tvptr" indica il tempo di attesa:
	NULL -> wait forever
	tvptr->tv_sec == 0 && tvptr->tv_usec == 0 -> non-blocking / Don't wait at all. All the specified descriptors are tested, and return is made immediately.
	tvptr->tv_sec != 0 || tvptr->tv_usec != 0 -> blocking / Wait the specified number of seconds and microseconds. Return is made when one of the specified descriptors is ready or when the timeout value expires.
"readfds, writefds, exceptfd" possono essere NULL.
"maxfdp1" = "maximum file descriptor plus 1", oppure "FD_SETSIZE".
Ritorna:
 -1  = errore -> vedi errno
  0  = no descriptors are ready (within the time limit specified)
  >0 = the number of descriptors that are ready = the sum of the descriptors ready in all three sets
Inoltre i descriptor sets sono modificati in modo che i bits ancora settati corrispondano ai descrittori ready. -> possono essere testati con "FD_ISSET"
esempio:
	fd_set readset, writeset;
	FD_ZERO(&readset);
	FD_ZERO(&writeset);
	FD_SET(0, &readset);
	FD_SET(3, &readset);
	FD_SET(1, &writeset);
	FD_SET(2, &writeset);
	select(4, &readset, &writeset, NULL, NULL);
	if( FD_ISSET(3, readset) ) // file descriptor n.3 ready for write

 
 === Memory mapped I/O ===
 
Mappa un file in un buffer di memoria.
Può facilitare la logica di I/O in quanto il file può essere acceduto come un array.

Da <sys/mman.h>:
	void* mmap( void* addr, size_t len, int prot, int flag, int fd, off_t off );
The return value of this function is the starting address of the mapped area,
oppure "MAP_FAILED".
"addr" = indirizzo iniziale del segmento, spesso è "0" = let the system choose
"len" = lunghezza del segmento/file -> può essere ricavato con "fstat"
"off" = offset iniziale del file, spesso è "0"
"prot" = protection, può essere:
	PROT_NONE
	oppure una bitmask di:
		PROT_READ
		PROT_WRITE
		PROT_EXEC
NOTA: cmq è subordinata alla modalità di apertura del file
"fd" = file descriptor del file da mappare
"flag" può essere:
	MAP_SHARED = l'I/O è effettuato direttamente su file
	MAP_PRIVATE = l'I/O avviene su di una copia in memoria del file
	...

esempio d'uso:
	// apre il file
		int fd = open("filename", O_RDONLY);
	// legge la dimensione
		struct stat statbuf;
		fstat(fs, &statbuf);
	// crea la mappatura
    void* fdmap = mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, fdin, 0);
	if( fdmap == MAP_FAILED )
        puts("mmap error");

 
 = Processi =

Vedi [os.txt] per una introduzione ai processi.

ciclo di vita:
 - nascita (da un altro processo)
 - duplicazione/riproduzione = creazione di uno o più processi figli
 - terminazione/exit

In Unix ogni processo ha associati nella sua *process table* entry:
 - *PID = Process ID* (univoco) > 1 (riservato ad "Init")
 - *PPID = Parent PID*
 - *real UID = User ID, GID = Group ID* (spesso quello dell'utente che lo esegue)
 - *effective UID, GID* (spesso uguali al real-UID/GID), che ne determina i permessi
 - *saved set-UID, GID* = all'owner del file se il flag set-UID/GID è abilitato (spesso uguali al effective-UID/GID) -> può essere cambiato solo da root
 - environment / ambiente d'esecuzione
 - open file descriptor table
 - umask = file creation mask
 - file locks
 - pending signals
 - signal mask = segnali bloccati
 - *PGID = Process Group ID*, *SID = Session ID* (di default = al PPID)
[...]
 
In Unix i processi sono organizzati in una struttura gerarchica ad albero (come i file).
Ogni processo viene generato da un altro processo.
Il primo processo che genera tutti gli altri è "init" (PID=1).
Ogni processo può generare uno o più *processi figli*,
inizialmente identici a se stesso.

Un processo può terminare in modo:
 - normale (con "exit", "_exit", return da "main"), con un *exit status*
 - anormale (con "abort", oppure alla ricezione di un segnale)
In entrambi i casi il SO mantiene il suo *termination status* finchè non viene letto dal padre.

2 casi:
 - se il padre termina prima del figlio -> il figlio diventa "orfano", viene adottato da "init" (il suo PPID=1) ed è rimosso dal SO non appena termina.
 - se il figlio termina prima del padre:
    - il padre riceve un segnale "SIGCHLD" (default handler = ignore)
    - se il padre "attende" la sua terminazione e legge il suo *termination status*,
	   il processo viene rimosso definitivamente dal sistema
    - se il padre non attende la sua terminazione,
	  il figlio diventa uno *zombie*:
	   il SO mantiene alcune sue info (tra cui il termination status) finchè il padre non le legge. -> PROBLEMA: spreco di PIDs
metodi per evitare la creazione di processi zombie:
 - signal(SIGCHLD, SIG_IGN);
 - struct sigaction sa; sa.sa_flags = SA_NOCLDWAIT; sigaction( SIGCHILD, &sa, NULL );
 - *double fork method*


 == Primitive gestione processi ==
 
lettura prorpietà del processo corrente:
	pid_t getpid();
	pid_t getppid();
	uid_t getuid();
	gid_t getgid();
	uid_t geteuid();
	gid_t getegid();
NOTA: non è possibile leggere il saved set-UID/GID (cmq inizialmente è uguale all'effective UID/GID, e solo root può cambiarlo).

modifica proprietà del processo corrente:
	int setuid( uid_t );
	int setgid( gid_t );
invocato da root: cambia real, effective e saved set-UID/GID
invocato non da root: può solo ripristinare l'effective UID/GID = real UID/GID oppure saved set-UID/GID
Altrimenti fallisce!

creazione nuovo processo:
	pid_t fork();
La "fork" determina una "biforcazione" nel flusso d'esecuzione del processo che la invoca.
Il processo che la invoca è detto *parent process*.
Il nuovo processo è detto *child process*.
Ritorna:
 - nel figlio: "0"
 - nel padre: il PID (>0) del nuovo processo figlio, o "-1" in caso di errore
(Il valore di ritorno quindi può essere usato per differenziare il comportamento dei due processi.)
Subito dopo la chiamata entrambi condividono lo stesso codice e la stessa memoria.
Questo ha un duplice vantaggio:
 - overhead minimo per la syscall
 - condivisione della memoria automatica tra processo padre e figlio
Dopo la "fork", la memoria di entrambi i processi segue la politica del
*COW = Copy On Write* = viene copiata e resa privata solo se modificata.
Quindi vengono ereditati anche:
 - open file descriptor table (i descrittori aperti nel padre valgono anche nel figlio)
 - real-UID, real-GID, effective-UID, effective-GID, saved set-UID, saved set-GID
 - group ID, session ID
 - environment, current working dir, root dir
 - umask = file creation mask
 - _tutti_ i signal handler settati, SIG_IGN compreso (vedi paragrafo sui segnali)
 - signal mask = i segnali bloccati temporaneamente dal processo
 - memory mapped segments
 - XSI IPC structures istances con IPC_PRIVATE key (vedi paragrafo "XSI IPC structures")
 - socket descriptors
NON vengono mai ereditati:
 - custom signal handlers (reset to default)
 - pending signals set
 - file locks
OSS.: di default padre e figlio condividono gli stessi stdin, stdout e stderr.
esempio d'uso:
	// shared code
	pid_t child_pid = fork();
	if( child_pid < 0 )
	{
		// fork error handling
		// ...
	}
	else if( child_pid == 0 )
	{
		// child process only code
		// ...
	}
	else if( child_pid > 0 )
	{
		// parent process only code
		// ...
	}
	// shared code
esempio d'uso:
	pid_t child_pid = fork();
	switch( child_pid )
	{
		case( -1 ):
		{
			... // fork error handling
			break;
		}

		case( 0 ):
		{
			... // child process only code
			break;
		}

		default:
		{
			... // parent process only code
			break;
		}
	}

attesa _sincrona_ terminazione di un figlio specifico, da <sys/wait.h>:
	pid_t waitpid( pid_t child_pid, int* status, int options );
Ritorna subito errore "-1" se non c'è nessun figlio.
"status" contiene l'exit code del figlio. (Se non è d'interesse può anche essere NULL.)
"options" può essere:
    0 = bloccante
	WNOHANG = non-bloccante
	WCONTINUED
	WUNTRACED

attesa _sincrona_ terminazione di un figlio qualsiasi, da <sys/wait.h>:
	pid_t wait( int* status ); // equivalente a: "waitpid( -1, status, 0 )"
Ritorna il PID del figlio che ha terminato.
NOTA: è bloccante!
esempio d'uso:
	// attesa _asincrona_ terminazione di un figlio qualsiasi con un signal handler per SIGCHLD
	void free_child( int signum )
	{
		signal( SIGCHLD, &free_child ); // ristabilisce il signal handler (necessario per unreliable signals)

		pid_t terminated_child_pid;
		int exit_code;
		terminated_child_pid = wait( &exit_code );
		printf("child with PID=%d terminated with exit code=%d\n", terminated_child_pid, exit_code);
		// ALTERNATIVA: WEXITSTATUS(exit_code)
	}
	signal( SIGCHLD, &free_child );
? NOTA: a seconda del SO i segnali dello stesso tipo possono essere accodati o gestiti con un unica invocazione del signal handler.
? Quindi in questo modo si potrebbe perdere il termination status di alcuni figli.
esempio d'uso:
	pid_t pid = fork();
	if (pid == -1)
	{
		perror("fork()");
	}
	/* parent */
	else if (pid > 0)
	{
		int status;

		printf("Child has pid %ld\n", (long)pid);

		if (wait(&status) == -1)
		{
		  perror("wait()");
		}
		else
		{
		  /* did the child terminate normally? */
		  if(WIFEXITED(status))
		  {
			printf("%ld exited with return code %d\n",
				   (long)pid, WEXITSTATUS(status));
		  }
		  /* was the child terminated by a signal? */
		  else if (WIFSIGNALED(status))
		  {
			printf("%ld terminated because it didn't catch signal number %d\n",
				   (long)pid, WTERMSIG(status));
		  }
		}
	}
	/* child */
	else
	{
		sleep(10);
		exit(0);
	}


loading programmi:
    int execl( char* path, char* arg0, ... );
    int execv( char* path, char** argv );
	int execle( char* path, char* arg0, ..., char** envp);
	int execve( char* path, char** argv, char** envp);
	int execlp( char* path, char* arg0, ... );
    int execvp( char* path, char** argv );
Questa famiglia di funzioni effettua il caricamento in memoria di un programma
sostituendo completamente l'immagine del processo corrente (codice e dati).
{
Ritorna solo in caso di errore "-1".
Se ha successo non ritorna nulla al chiamante perchè l'immagine del processo è sostituita.
Quindi non è necessario testare il valore di ritorno, se ritorna si è verificato un errore.
}
function names postfix:
	p = PATH environment variable is used to find the file to be executed
    l = list of command-line arguments are passed individually to the function (variadic)
    v = vector = command-line arguments are passed to the function as an array of strings
	e = environment arguments is explicitly passed to the child process as an array of strings
"path" = pathname completo dell'eseguibile (ad es. "/bin/ls"), oppure può essere uguale a "arg0"
"arg0" = il nome del file eseguibile
"envp" = array di stringhe contententi le variabili d'ambiente (del tipo "name=value") - può essere letto dalla variabile globale "environ"
Vengono ereditati:
 - PID, PPID (invariati)
 - real-UID, real-GID (effective-UID, effective-GID possono variare)
 - group ID, session ID
 - environment, current working dir, root dir
 - umask = file creation mask
 - segnali settati a SIG_IGN (gli altri vengono resettati a SIG_DFL)
 - pending signals
 - signal mask = i segnali bloccati
 - file locks, open file descriptor table
NON vengono ereditati:
 - opened file descriptors with FD_CLOEXEC flag set -> chiusi automaticamente
OSS.: di default padre e figlio condividono gli stessi stdin, stdout e stderr.
esempio d'uso:
	// esegue il comando "ls -l" cercando l'eseguibile nel PATH di sistema
	pid_t pid = fork();
	if( pid == 0 ) {
		execlp( "ls", "ls", "-l", (char *)0 );
		// error handling - we're here only if execl fails
		perror("execl");
		exit(1);
	}
	// parent process continues normal execution here


 == Ambiente d'esecuzione ==

L'ambiente d'esecuzione di un processo è costituito da un insieme di variabili
di tipo stringa aventi ciascuna un proprio nome ed un proprio valore.
Tra queste vi sono sempre:
 - PATH
 - current working directory
 - root dir
[...]

Le variabili d'ambiente sono accessibili mediante l'array di stringhe globale
definito in <unistd.h>:
	extern char** environ;
Ogni stringa contiene una definizione del tipo "name=value".

ricerca e lettura di una variabile di sistema,
da <stdlib.h> (libc):
 	char* getenv( char* variable_name );
Ritorna NULL se non esiste.

aggiunta/modifica di una variabile di sistema,
da <stdlib.h> (POSIX.1-???):
	int putenv( char* declaration );
	int setenv( char* name, char* value, int overwrite );
[...]

get and change current working directory, da <unistd.h>:
	char* getcwd( char* buf, size_t size ); // get 
	int chdir( char* path ); // change current working directory

imposta la *file creation mask* del processo corrente = i permessi di default _da negare_ ad ogni _nuovo_ file creato dal processo.
da <sys/stat.h>:
	mode_t umask( mode_t );
esempio:
	umask( 022 ); // nega i permessi di scrittura a group e other

 
 == IPC = Inter-Process Communication ==

Il SO fornisce varie tecniche per far comuninare due o più processi:
 - file sharing + locking (sconsigliato)
 - pipes (la più vecchia e diffusa)
 - signaling
 - XSI IPC structures
    - message passing / Message queue
    - semafori
    - memoria condivisa
 - sockets (per processi remoti)
 
 
 === File sharing ===

I file sono oggetti condivisi (più processi possono accedervi contemporaneamente),
quindi possono essere usati come una forma di IPC.

nel kernel abbiamo:
  - *process table* (contente tutti i processi attivi nel SO)
     in ogni entry:
     - *open file descriptor table* -> _the file descriptors index the entries in this table_
	    in ogni entry:
	    - file descriptor flags
	    - file table entry pointer
	 [...] -> altre info, vedi "Processi"
 - *file table* (contente tutti i files aperti dai processi) - OSS.: lo stesso file fisico può avere più entries se più processi lo aprono contemporaneamente.
    in ogni entry:
     - file status flags
     - current offset
     - vnode table pointer
  - *vnode table* (contiene info sui file aperti: l'inode, i permessi, etc.) - OSS.: max 1 sola entry per ogni file fisico

get/set the file descriptor flags (process-wide), da <fcntl.h>:
	int fcntl( int fd, F_GETFD ); // get
	int fcntl( int fd, F_SETFD, long arg ); // set
"args" può essere solo:
	FD_CLOEXEC = close on exec

get/set the file status flags (system-wide), da <fcntl.h>:
	int fcntl( int fd, F_GETFL ); // get
	int fcntl( int fd, F_SETFL, long arg ); // set
"arg" è una bitmask costituita con:
	O_APPEND = append on each write
	O_NONBLOCK
	O_SYNC = wait for writes to complete (data and attributes)
	O_DSYNC = wait for writes to complete (data only)
	O_RSYNC = synchronize reads and writes
 
OSS.: chiarimento sul termine "file descriptor":
 I "veri" file descriptor sono delle strutture del kernel e non sono visibili in user-space.
 I "file descriptors" comunemente gestiti dai programmi sono degli interi strettamente positivi che indicizzano le entries della open file descriptor table.
 Le funzioni di I/O di basso livello necessitano di questi indici per accedere ai file descriptors veri e propri nel kernel.
 Si usa chiamare impropriamente con il termine "file descriptor" anche gli indici della open file descriptor table.

Più processi possono accedere allo stesso file in due modi:
 - aprendo lo stesso file individualmente -> si creano 2 file table entries distinte, ognuna con il proprio offset -> rischio interferenze/sovrascritture
 - duplicando il file descriptor -> i 2 file descriptor puntano alla stessa file table entry, quindi l'offset è condiviso
  
*duplicazione di un file descriptor*
 = crea una nuova entry nella open file descriptor table del processo
che punta alla stessa file table entry del vecchio
(i due file descriptor condivideranno lo stesso offset e file status flags).
-> avviene automaticamente con la "fork".
Può essere anche effettuato manualmente mediante:
da <fcntl.h>:
	int fcntl( int old_fd, F_DUPFD, int new_fd );
da <unistd.h>:
	int dup( int old_fd ); // seleziona il più basso file descriptor libero. Equivalent to: "fcntl( old_fd , F_DUPFD, 0 )
	int dup2( int old_fd, int new_fd ); // ricicla un file descriptor già aperto. Equivalent to: "close( new_fd ); return fcntl( old_fd, F_DUPFD, new_fd );"
Ritornano il nuovo file descriptor, oppure "-1" in caso di errore.

Il file sharing può essere ottenuto in diversi modi:
 - tra processi imparentati:
    - con la fork, un figlio eredita dal padre tutta la sua open file descriptor table
 - tra processi _non_ imparentati:
    - più processi aprono lo stesso file con la "open" (la "open" crea sempre una nuova entry nella file table) -> si creano 2 file table entries distinte, ognuna con il proprio offset -> rischio interferenze/sovrascritture
    - *passing file descriptors*: un processo server apre il file, quindi invia il file descriptor agli altri processi mediante un meccanismo di IPC. Gli altri processi "importano" il file descriptor con una syscall apposita (ad es. "ioctl" con "I_RECVFD").
L'ultimo metodo è noto anche come "passing access rights" ed è necessario se per motivi di sicurezza gli altri processi non hanno i privilegi di accesso al file.


 ==== File locking ====
 
Il locking permette ad un processo di restringere l'accesso ad un file su cui sta operando / accedere ad un file in modo esclusivo.

E' _necessario_ nei seguenti casi:
 - per *serializzare* più operazioni di I/O di un file, evitando così che la loro esecuzione possa essare interfogliata con altre operazioni di I/O sullo stesso file eseguite da altri processi.
 - più in generale, per evitare interferenze con gli altri processi in esecuzione sul SO, che altrimenti sono sempre liberi di aprire lo stesso file e modificarlo a piacimento.

NON è necessario:
 - per i file descriptors duplicati/ereditati con la fork

NOTA: File locks are based on inode instead of file name, since UNIX allows multiple names to refer to the same file.
Ciononostante, _usually file locks set by the parent process are not inherited by the child process._
??? NOTA2: su Unix i lock non sono ricorsivi = non possono essere acquisiti più volte -> deadlock?

NOTA3: in Unix di default i lock sono di tipo *advisory / cooperative*, quindi _non_ impediscono effettivamente agli altri processi di accedere ai file bloccati (!= Windows).

Sono supportati due tipi di locking:
 - *read / shared locking* = tutti possono leggere, nessuno può scrivere (impedisce il write locking agli altri processi)
 - *write / exclusive locking* = solo il locker può leggere e scrivere (impedisce il read/write locking agli altri processi)

E' possibile bloccare l'intero file o solo una parte/dei records.
 
record/file locking, da <fcntl.h>:
	int fcntl( int fd, int cmd, struct flock* ldata );
"cmd" può essere:
	F_SETLK = Set or clear a lock
	F_SETLKW = Set or clear a lock + Wait if already locked
	F_GETLK = Get the first lock -> ritorna info sul processo che attualmente possiede il lock
"ldata" è così definita in <fcntl.h>:
	struct flock {
		short l_type;   // type of lock. Can be: F_RDLCK=shared, F_WRLCK=exclusive, F_UNLCK
		short l_whence; // Can be: SEEK_SET=BOF, SEEK_CUR=current, SEEK_END=EOF
		off_t l_start;  // relative offset in bytes
		off_t l_len;    // size; if 0 then until EOF
		pid_t l_pid     // usato con F_GETLK: process ID of the process holding the lock
	};
esempio d'uso:
	// setta un write lock su tutto il file
	int fd = ...
	struct flock fl;
		fl.l_type = F_WRLCK; // write lock
		fl.l_whence = SEEK_SET; // from BOF=Beginning Of File
		fl.l_start = 0; // offset = 0
		fl.l_len = 0; // until EOF
	fcntl( fd, F_SETLK, &fl );
	// ...
	// rimuove il lock
		fl.l_type = F_UNLCK;
	fcntl( fd, F_SETLK, &fl );

ALTERNATIVA record/file locking, da <unistd.h>:
	int lockf( int fd, int function, off_t size );
"function" può essere:
	F_ULOCK = unlock
	F_LOCK = lock for exclusive use
	F_TLOCK = test and lock for exclusive use
	F_TEST = test for locks by other processes
"size" indica il n° di bytes da bloccare _dalla posizione corrente del file_.
 Può anche essere un numero negativo.
 If size is 0, the section from the current offset through the largest possible file offset is locked (that is, from the current offset through the present or any future end-of-file).

NOTA: esiste anche una terza alternativa da BSD, ma non è supportata in POSIX.
Da <sys/file.h>:
	int flock( int fd, int operation );
[...]

NOTA: Whatever locking mechanism you use, it is important to sync all your file IO while the lock is active.

OSS.: per bloccare un file è sempre necessario aprirlo _prima_.

I lock vengono mantenuti finchè:
 - viene eseguito il relativo unlock (consigliato)
 - il file viene chiuso
 - il processo termina


 === Pipes ===

Le pipes sono la forma di IPC più antica e diffusa su Unix.
Le pipes però hanno 2 limitazioni:
 - sono half-duplex = comunicazione unidirezionale
 - i processi che comunicano devono essere imparentati
Normalmente i processi che comunicano sono 2 (1 scrittore ed 1 lettore),
ma è possibile anche avere più scrittori sulla stessa pipe.

uso tipico:
 1. il parent process crea la pipe
 2. fa fork
 3. a seconda del senso della comunicazione parent e child chiudono l'estremo della pipe che non utilizzano
 4. avviene la comunicazione

creazione di una pipe, da <unistd.h>:
	int pipe( int filedes[2] );
Le 2 estremità della pipe sono rappresentate da 2 file descriptor:
 - fildes[0] = l'estremo di input read-only
 - fildes[1] = l'estremo di output write-only
Ritorna "0" in caso di successo o "-1" in caso di errore.

L'I/O sulla pipe è effettuato con le stesse primitive per i file.

Se l'altro estremo della pipe è stato chiuso:
 - la "read" ritorna sempre "0"
 - la "write" ritorna "1", setta errno=EPIPE, e si genera il segnale SIGPIPE

La costante "PIPE_BUF" indica la dimenzione del *kernel pipe buffer*.
Per garantire l'atomicità delle scritture nella pipe
(ed evitare l'interleaving se ci sono più processi scrittori)
non bisogna scriver più bytes che il buffer può contenere.

esempio d'uso:
	// Send data from parent to child over a pipe
	int fd[2];
    pipe( fd );
    pid_t pid = fork();
    if( pid > 0 ) {       /* parent */
        close( fd[0] ); // close read-end of pipe
        write( fd[1], "hello world\n", 12 );
    } else {                /* child */
        close( fd[1] ); // close write-end of pipe
		char msg[MAXLINE];
        read( fd[0], msg, MAXLINE );
        puts( msg );
    }

E' possibile ridirezionare nella pipe lo stdin/out di un processo.
Per questo scopo tuttavia è più semplice usare la "popen"/"pclose":
	FILE* popen( const char* cmdstring, const char* type );
	int pclose( FILE* fp ); // chiude la pipe e ritorna l'exit status del comando
Ad es.:
	FILE* cmdstdout = popen( cmd, "r" ); // esegue "cmd" nella shell e ridireziona il suo stdout
	FILE*  cmdstdin = popen( cmd, "w" ); // esegue "cmd" nella shell e ridireziona il suo stdin

Se sia lo stdout che lo stdin del processo figlio sono ridirezionati con 2 pipes distinte,
il processo figlio è detto *coprocesso*.
In questo caso non si possono usare le utilities delle stdlib, ma bisogna ridirezionare gli stdin/out del figlio con la "dup2".
esempio:
	int fd1[2], fd2[2];
	pipe(fd1);
	pipe(fd2);
	pid_t pid = fork());
	if (pid > 0) {       /* parent */
	    close(fd1[0]);
        close(fd2[1]);
		// ...
		// scrive l'input al figlio e legge il suo output
		// ...
	
	} else {                                  /* child */
        close(fd1[1]);
        close(fd2[0]);
        if(fd1[0] != STDIN_FILENO) {
            if (dup2(fd1[0], STDIN_FILENO) != STDIN_FILENO)
                err_sys("dup2 error to stdin");
            close( fd1[0] ); /* don't need this after dup2 */
        }
        if (fd2[1] != STDOUT_FILENO) {
            if (dup2(fd2[1], STDOUT_FILENO) != STDOUT_FILENO)
                err_sys("dup2 error to stdout");
            close(fd2[1]);
        }
		exec( ... ); // OSS.: mantiene le ridirezioni!

VARIANTI: -> non molto portabili
 - *named pipes / FIFOs / well-known pipes* -> superano la seconda limitazione, ma sono sempre half-duplex
 - *s-pipes / STREAMs* = unnamed, full duplex

creazione di una FIFO, da <sys/stat.h>:
	int mkfifo ( char* filename, mode_t mode );
"mode" segue le stesse regole per la "open".
Una volta creata, viene gestita come un file:
 - i processi che devono scrivere su di essa la aprono con "open( filename, O_WRONLY)"
 - i processi che devono leggere su di essa la aprono con "open( filename, O_RDONLY|O_NONBLOCK)"
Per la rimozione si può usare la "unlink".


  === Gestione dei segnali ===

[...] -> vedi [c.txt] per una introduzione ai segnali
  
I segnali possono essere utilizzati come una forma di comunicazione _asincrona_ tra processi.
L'informazione veicolata è sempre di tipo binaria (un segnale c'è o non c'è).
NOTA: per poter comunicare inviandosi segnali, è necessario che 2 processi abbiano i medesimi privilegi.

Un segnale può trovarsi in diversi stati sequenziali:
 1. generated
 2. pending = non ancora consegnato e/o catturato
 3. delivered / catched = when the associated action for a signal is taken
Un processo può "bloccare" temporaneamente la consegna di uno o più segnali (che restano nello stato "pending").

Ad ogni processo sono associati:
 - signal handlers = funzioni utente designate alla gestione dei segnali
 - pending signals mask
 - una *signal mask* = insieme dei segnali temporaneamente "bloccati"

LIMITAZIONI:
 - POSIX non specifica se i segnali dello stesso tipo debbano essere accodati oppure se accorpati (di solito sono accorpati).
 - POSIX non specifica l'ordine in cui devono essere consegnati.
 - non sono thread-safe
 - non sono real time = la consegna può essere protratta per tempo indefinito
SOLUZIONE: *real-time signals* (POSIX.1b-only) -> [...]

NOTA: Automatic restart of interrupted system calls
Se il processo cattura un segnale con un suo custom handler durante l'esecuzione di una syscall, può succedere:
 - con syscalls "lente" = che possono bloccarsi per tempo indefinito (ad es. read, write e open con IPCs, pause e wait)
    -> la syscall viene interrotta, ritorna errore, e setta errno=EINTR.
    -> con la nuova interfaccia POSIX è possibile impostare il riavvio automatico. Altrimenti è opzionale!
 - con altre syscalls: il signal handler va in esecuzione _dopo_ la syscall

I segnali sono rappresentati come delle costanti intere > 0 e < 32.
Un insieme di segnali può essere rappresentato con il tipo "sigset_t".

POSIX estende le funzionalità di gestione dei segnali fornite dalla
C stardard library in <signal.h>, e dichiara i seguenti segnali standard:

segnali di terminazione software: (azione di default=terminazione processo) -> signal handler tipico: pulizia file temporanei, chiusura sockets, etc.
	SIGABORT = Process abort signal -> generato dalla "abort"
	SIGTERM = Termination signal -> inviato da "kill"
	SIGINT = Terminal interrupt signal -> generato con "Ctrl-C" su console
	SIGQUIT = Terminal quit signal -> generato con "Ctrl-\" su console
	SIGHUP = Hangup -> terminale disconnesso
PARTICOLARI: SIGKILL, SIGSTOP -> CANNOT BE CAUGHT OR IGNORED, causano cmq la terminazione del programma

segnali hardware exceptions: (azione di default=terminazione processo) -> signal handler tipico: pulizia file temporanei, chiusura sockets, etc.
	SIGILL = Illegal instruction
	SIGFPE = Erroneous arithmetic operation (e.g. division by 0)
	SIGSEGV = Invalid memory reference (segmentation fault)
NOTA: anche se catturati, il programma potrebbe non essere in grado di proseguire l'esecuzione.

segnali errori software: (azione di default=terminazione processo)
	SIGPIPE = Broken pipe: write to pipe with no readers
	SIGALRM = Alarm clock -> generato dalla "alarm" (vedi sotto)

segnali di comunicazione IPC: (default action: ignore)
	SIGCHLD = Child process terminated or stopped -> signal handler tipico: chiama "wait" per identificare il figlio, ne legge l'exit status, etc.
altri segnali: (default action: process termination)
	SIGUSR1 = User-defined signal 1
	SIGUSR2 = User-defined signal 2
NOTA: possono essore usati per la comunicazione IPC

altri segnali: (default action: ignore)
	0 = null signal -> utile per testare se un processo è "vivo" o meno!
	SIGCONT = Continue executing, if stopped

altri segnali: (default action: suspend process)
	SIGTSTP = Terminal stop signal -> generato con "Ctrl-Z" su console
	SIGTTIN = Background process attempting read
	SIGTTOU = Background process attempting write

{
POSIX.1-2001-only:
	SIGBUS 		ii 	Access to an undefined portion of a memory object
	SIGPOLL 	i 	Pollable event
	SIGPROF 	i 	Profiling timer expired
	SIGSYS 		ii 	Bad system call
	SIGTRAP 	ii 	Trace/breakpoint trap
	SIGURG 		iii High bandwidth data is available at a socket
	SIGVTALRM 	i 	Virtual timer expired
	SIGXCPU 	ii 	CPU time limit exceeded
	SIGXFSZ 	ii 	File size limit exceeded
default actions:
	i = Abnormal termination of the process. The process is terminated with all the consequences of _exit() except that the status made available to wait() and waitpid() indicates abnormal termination by the specified signal. 
	ii = Abnormal termination of the process. Additionally, implementation-dependent abnormal termination actions, such as creation of a core file, may occur. 
	iii = Ignore the signal. 
	iv = Stop the process. 
	v = Continue the process, if it is stopped; otherwise ignore the signal. 
}


 ==== vecchia interfaccia/semantica obsoleta ===
 
registrazione signal handler:
	typedef void sighandler_t(int);
	sighandler_t* signal( int signo, sighandler_t* handler );
Ritorna il signal handler precedentemente associato,
oppure SIG_ERR = errore.
NOTA: e' possibile associare lo stesso signal handler a più segnali.
esempio d'uso:
	// ignora il segnale SIGUSR1
	if (signal(SIGUSR1, SIG_IGN) == SIG_ERR)
        puts("cannot ignore SIGUSR1");
	// cattura il segnale SIGINT solo se non è attualmente ignorato (utile per programmi interattivi)
	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
		signal(SIGINT, sig_int);
LIMITAZIONE: non può leggere l'associazione attuale senza modificarla!

invio sincrono di un segnale ad un altro processo:
	int kill( pid_t target, int signo ); // invio di un segnale ad un processo (o ad un gruppo di processi)
"target" può essere:
 >0  = singolo processo
 <-1 = gruppo di processi con PGID = "-target" (vedi sotto "Process groups and sessions")
 =0  = tutti i processi nello stesso gruppo del caller
 -1  = tutti i processi (con cui si ha i permessi di comunicare)

attesta per tempo indefinito di un segnale:
	int pause();
Sospende il processo finchè non viene ricevuto un segnale.
NOTA: è bloccante!
Ritorna quando il processo riceve un segnale qualsiasi.
LIMITAZIONE: non può attendere un tipo di segnale preciso.
PROBLEMA PER LA SINCRONIZZAZIONE: il segnale atteso potrebbe essere consegnato prima della sua invocazione. -> necessario bloccare il segnale e usare sigsuspend

attesta per tempo definito di un segnale:
	unsigned int sleep( unsigned int seconds );
Ritorna "0" se allo scadere del tempo non ha ricevuto nessun segnale,
oppure il numero di secondi residui.

timer/generazione di un SIGALRM ad intervalli regolari, da <unistd.h>:
	unsigned int alarm( unsigned int seconds );
OSS.: garantisce solo che la pausa sarà di _almeno_ il numero specificato di secondi
NOTA: in POSIX ci sono anche timers realtime -> vedi [...]

[...] -> vedi [c.txt] per le altre funzioni in <signal.h>


 ==== nuova interfaccia POSIX / affidabile ====
 
Oltre al signal handler vengono fornite al kernel indicazioni aggintive con la
	 struct sigaction { // (advanced) signal handler definition
		   union {
			   void (*sa_handler)(int);  // pointer to signal handler function or one of the macros SIG_IGN or SIG_DFL
			   void (*sa_sigaction)(int, siginfo_t *, void *); // (alternative to the above)
		   }
		   sigset_t   sa_mask;   // set of signals to be blocked during execution of the signal handling function -> include automaticamente il segnale corrente
		   int        sa_flags;  // special flags / signal options
	 };
"sa_flags" può essere:
	SA_RESTART = richiede il riavvio automatico delle syscalls interrotte -> utile!
	SA_NOCLDWAIT = if signum is SIGCHLD, do not transform children into zombies when they terminate
	SA_NOCLDSTOP = if signo is SIGCHLD, do not generate this signal when a child process stops (job control)
	SA_NODEFER = when this signal is caught, the signal is not automatically blocked by the system while the signal-catching function executes
	...

lettura/registrazione di un signal handler:
	int sigaction( int signo, const struct sigaction* act, struct sigaction* oact );
LETTURA: se "oact" non è NULL vi scrive le proprietà attuali.
MODIFICA: se "act" non è NULL.
 
gestione insiemi di segnali:
	int sigemptyset(sigset_t *set); // initialise and empty a signal set (all disabled)
	int sigfillset(sigset_t *set); // initialise and fill a signal set (all enabled)
	int sigaddset(sigset_t *set, int signo); // add a signal to a signal set
	int sigdelset(sigset_t *set, int signo); // delete a signal from a signal set 
	int sigismember(const sigset_t *set, int signo); // test for a signal in a signal set 
esempio d'uso:
	sigset_t s;
	sigemptyset(&s);
	sigaddset(&s, SIGUSR1);
	sigaddset(&s, SIGUSR2);
	if( sigismember(&s, SIGUSR1) ) puts("OK");

lettura/modifica signal mask:
	int sigprocmask( int how, const sigset_t* set, sigset_t* oset );
LETTURA: se "oset" non è NULL vi scrive la signal mask attuale del processo
MODIFICA: se "set" non è NULL modifica la signal mask del processo come indicato dal campo "how":
	SIG_BLOCK = The resulting set will be _the union_ of the current set and the signal set pointed to by set. -> più usato
	SIG_SETMASK = The resulting set will be the signal set pointed to by set. -> _sostituisce completamente la maschera!_
	SIG_UNBLOCK = The resulting set will be _the intersection_ of the current set and the complement of the signal set pointed to by set.
esempio d'uso:
	sigset_t    newmask, oldmask, waitmask;
    // Block SIGINT and save current signal mask
		if (sigprocmask(SIG_BLOCK, &newmask, &oldmask) < 0)
		   puts("SIG_BLOCK error");
	// ...
    // Reset signal mask which unblocks SIGINT
		if (sigprocmask(SIG_SETMASK, &oldmask, NULL) < 0)
			puts("SIG_SETMASK error");

lettura/modifica singal mask (macros):
	int sighold(int signo); // adds "sig" to the calling process' signal mask
	int sigignore(int signo); // sets the disposition of "sig" to "SIG_IGN"
	int sigpause(int signo);
	int sigrelse(int signo); // removes "sig" from the calling process' signal mask
NOTA: sono dei wrapper di "sigprocmask", fornite per comodità.

lettura blocked-pending signals:
	int sigpending( sigset_t* set );

REIVEDERE -> par.10.16
{
PROBLEMA: quando si "sblocca" un segnale pending e lo si vuole leggere subito, il segnale potrebbe andare perso.
-> E' necessaria una syscall atomica che faccia unlock+pause:
attesa di un segnale:
	int sigsuspend( const sigset_t* set );
The program is effectively suspended until one of the signals that is not a member of "set" arrives.
esempio d'uso:
	// blocca il segnale SIGQUIT
	....
	// sblocca e attende l'arrivo di SIGQUIT
	//:.
NOTA: può bloccarsi per tempo indefinito!
}


 ==== Process groups and sessions ====

Per distribuire/far ricevere lo stesso segnale ad un insieme di processi,
si può definire un *process group*.

Per la gestione dei *jobs in background/foreground*,
a loro volta i process groups possono essere raccolti in *process sessions*.
NOTA: *Job Control support is Optional*. You can use the _POSIX_JOB_CONTROL macro to test at compile-time whether the system supports job control.
_If job control is not supported, then there can be only one process group per session_, which behaves as if it were always in the foreground.
The functions for creating additional process groups simply fail with the error code ENOSYS.

LIMITAZIONE: Un processo può appartenere _ad un solo_ gruppo/sessione per volta.
Ogni process group/session ha un ID (PGID, SID), memorizzato sempre con il tipo "pid_t".
Ogni process group/session ha un *leader* = il primo processo che lo ha creato.
Se il leader temina prima degli altri processi, il process group diventa "orfano".

Di default PGID e PSID sono uguali al PPID.
Quindi se si invia un "SIGKILL" ad un processo vengono uccisi con lui
tutti i suoi figli.
Per "svincolare" il destino di un figlio dal proprio padre
lo si deve inserire in un process group separato.

create/join a process group:
	int setpgid( pid_t pid, pid_t pgid );
puts the process "pid" into the process group "pgid".
Se "pgid == pid" oppure "pgid == 0",
crea un nuovo process group con il processo corrente come leader.

creazione di una nuova process session:
	pid_t setsid();
The calling process becomes the session leader,
and is put in a new process group whose PGID is the same as the PID of that process. 

lettura PGID/SID:
	pid_t getpgrp(); // returns the PGID of the calling process
	pid_t getsid(0); // returns the SID of the calling process


 === XSI IPC structures ===

The XSI IPC functions are based closely on the System V IPC functions.

XSI IPC structures:
 - message queues
 - semaphores
 - shared memory segments

ogni istanza è identificata da:
 - internal name / non-negative integer *identifier* -> allocato dal kernel
 - external name / *key* -> scelto dal programmatore -> puo' essere usato come rendezvous tra i processi

Un'istanza è costruita con le "xxxget" functions:
	int msgget(key_t, int);
	int semget(key_t, int, int);
	int shmget(key_t, size_t, int);
Ogni costruttore prende in input la key ed un integer flag.
Per creare una _nuova istanza_:
	xxxget( ...
Per aprire una _istanza esistente_:
	xxxget( ...

Per rimuovere un'istanza:
	ipcrm( ...

Ad ogni instanza e' associata una *permission structure*, definita in <sys/ipc.h>: 
	struct ipc_perm {
		uid_t    uid;    // owner's user ID
		gid_t    gid;    // owner's group ID
		uid_t    cuid;   // creator's user ID
		gid_t    cgid;   // creator's group ID
		mode_t   mode;   // read/write permission
	}
che definisce l'owner e i permessi della stessa.

Svantaggi rispetto alle altre forme di IPC:
 - no file system mapping -> rendezvous più difficile
 - no rimozione automatica dal kernel -> kernel memory leaks
 - no file descriptors -> no multiplexed I/O possible con poll, select
A causa di queste limitazioni queste forme di IPCs non sono molto usate.

Lo standard POSIX.1b definisce anche una implementazione alternativa delle XSI IPC structures
nella sua realtime extension.


 ==== Message passing ====

 
 ==== Semafori ====
 
Sono disponibili 2 interfacce:
 - System V-style, in <sys/sem.h> http://www.cim.mcgill.ca/~franco/OpSys-304-427/lecture-notes/node31.html
 - POSIX-style, in <semaphore.h> (da POSIX.1b)

[TODO: vedere SOLO la seconda!]


 ==== Shared memory ====

Sono disponibili 2 interfacce:
 - System V-style, in <sys/shm.h> (da UNIX95) -> utilizza uno "shmid"
 - BSD-style, in <sys/mman.h> -> si appoggia ad un file (memory mapped I/O)
 
caratteristiche comuni:
 - Uniquely naming the segment
 - Specifying access permissions
 - Race conditions handling

SysV:
Due o più processi condividono un segmento di memoria per comunicare.
Il segmento può essere "attaccato" e "staccato" più volte dal processo.
Il processo che lo crea deve autorizzare gli altri ad accedervi.
[...]
http://www.cs.cf.ac.uk/Dave/C/node27.html
http://fscked.org/writings/SHM/shm.html#toc2

NOTA: se più processi accedono contemporaneamente _in scrittura_ alla stessa
area di memoria può essere necessario l'uso dei semafori.

 
 === Sockets ===

Lo standard POSIX si basa sui
 *BSD / Berkeley sockets* (d'ora in poi semplicemente "sockets"),
 definiti in <sys/socket.h>.
In alternativa, è disponibile anche la *TLI = Transport Layer Interface*,
 proveniente da UNIX System V e basata sugli *STREAMS*, definita in <stropts.h>.

A differenza dei metodi precedenti,
 i sockets consentono la comunicazione anche tra *processi remoti* = su 2 OS differenti.

vedi [socket.txt]


 == Sincronizzazione ==
 
Una volta generato un figlio, il padre può:
 - attendere la sua terminazione (con "wait")
 - proseguire la sua esecuzione (può ricevere notifica della sua terminazione settando un signal handler per SIGCHLD)
Nel secondo caso dovrà curarsi di non interferire con il figlio usando le risorse condivise -> race conditions
SOLUZIONI:
 - chiusura dei file descriptors non utilizzati
 - definizione di *sezioni critiche* e uso di IPC per la sincronizzazione:
    - 2 pipes -> ok se i processi sono imparentati
    - segnali (con "sigsuspend" + SIGUSR1/2) -> ok se i processi sono imparentati
    - XSI IPC structures, semafori -> non molto usate
    - POSIX realtime IPC structures, semafori

protocollo generale:
	TELL_WAIT();    /* set things up for TELL_xxx & WAIT_xxx */
	if ((pid = fork()) < 0) {
		err_sys("fork error");
	} else if (pid == 0) {            /* child */
		/* child does whatever is necessary ... */
		TELL_PARENT(getppid());     /* tell parent we're done */
		WAIT_PARENT();              /* and wait for parent */
		/* and the child continues on its way ... */
		exit(0);
	}
	/* parent does whatever is necessary ... */
	TELL_CHILD(pid);            /* tell child we're done */
	WAIT_CHILD();               /* and wait for child */
	/* and the parent continues on its way ... */
	exit(0);

esempio - sincronizzazione con pipes:
	static int  pfd1[2], pfd2[2];
	void TELL_WAIT(void)
	{
		if (pipe(pfd1) < 0 || pipe(pfd2) < 0)
			err_sys("pipe error");
	}
	void TELL_PARENT(pid_t pid)
	{
		if (write(pfd2[1], "c", 1) != 1)
			err_sys("write error");
	}
	void WAIT_PARENT(void)
	{
		char    c;

		if (read(pfd1[0], &c, 1) != 1)
			err_sys("read error");

		if (c != 'p')
			err_quit("WAIT_PARENT: incorrect data");
	}
	void TELL_CHILD(pid_t pid)
	{
		if (write(pfd1[1], "p", 1) != 1)
			err_sys("write error");
	}
	void WAIT_CHILD(void)
	{
		char    c;

		if (read(pfd2[0], &c, 1) != 1)
			err_sys("read error");

		if (c != 'c')
			err_quit("WAIT_CHILD: incorrect data");
	}

esempio - sincronizzazione con segnali:
	sig_atomic_t sigflag; /* set nonzero by sig handler */
	sigset_t newmask, oldmask, zeromask;

	static void sig_usr(int signo)   /* one signal handler for SIGUSR1 and SIGUSR2 */
	{
		sigflag = 1;
	}

	void TELL_WAIT(void)
	{
		if (signal(SIGUSR1, sig_usr) == SIG_ERR)
			err_sys("signal(SIGUSR1) error");
		if (signal(SIGUSR2, sig_usr) == SIG_ERR)
			err_sys("signal(SIGUSR2) error");
		sigemptyset(&zeromask);
		sigemptyset(&newmask);
		sigaddset(&newmask, SIGUSR1);
		sigaddset(&newmask, SIGUSR2);

		/*
		 * Block SIGUSR1 and SIGUSR2, and save current signal mask.
		 */
		if (sigprocmask(SIG_BLOCK, &newmask, &oldmask) < 0)
			err_sys("SIG_BLOCK error");
	}

	void TELL_PARENT(pid_t pid)
	{
		kill(pid, SIGUSR2);              /* tell parent we're done */
	}

	void WAIT_PARENT(void)
	{
		while (sigflag == 0)
			sigsuspend(&zeromask);   /* and wait for parent */
		sigflag = 0;

		/*
		 * Reset signal mask to original value.
		 */
		if (sigprocmask(SIG_SETMASK, &oldmask, NULL) < 0)
			err_sys("SIG_SETMASK error");
	}

	void TELL_CHILD(pid_t pid)
	{
		kill(pid, SIGUSR1);             /* tell child we're done */
	}

	void WAIT_CHILD(void)
	{
		while (sigflag == 0)
			sigsuspend(&zeromask);  /* and wait for child */
		sigflag = 0;

		/*
		 * Reset signal mask to original value.
		 */
		if (sigprocmask(SIG_SETMASK, &oldmask, NULL) < 0)
			err_sys("SIG_SETMASK error");
	}

esempio - sincronizzazione con POSIX semaphores:
	...


 == Threads == 

multithreading - vantaggi:
 - gestione semplificata eventi asincroni
 - condivisione memoria e file descriptors automatica
 - esecuzione in parallelo di più operazioni semplificata (serializing tasks)
 - miglior response-time per programmi interattivi -> separazione UI/core
 
multithreading - svantaggi:
 - race conditions -> necessità sincronizzazione -> possibilità deterioramento prestazioni
 
Il multithreading si presta bene nel caso in cui un programma debba svolgere più task contemporaneamente tra loro indipendenti.
Se i task non sono indipendenti ma condividono delle risorse (locazioni di memoria, files, etc.) è necessario sincronizzarli.
La sincronizzazione complica la logica del programma e potrebbe causare un deterioramento delle prestazioni.
Quindi se i dati condivisi sono molti il multithreading potrebbe non essere la scelta migliore.

i threads di un processo condividono:
 - funzioni e variabili globali
 - memoria dinamica
 - file descriptors
 - environment, working directory
 - UID/GID

i threads di un processo NON condividono:
 - TID = Thread ID -> univoco solo all'interno di un processo
 - stato registri CPU
 - stack
 - pending signals, signal mask -> cleared on thread creation
 - errno
 - thread-specific data (vedi sotto)

L'implementazione POSIX dei thread è opzionale ed è fornita in:
	#if _POSIX_THREADS
		#include <pthread.h>
	#endif

"pthread_t" è il tipo (opaco) per memorizzare il TID.

NOTA: there is no portable way to print the thread ID

lettura TID thread corrente:
	pthread_t pthread_self(void);

confronto TIDs:
	int pthread_equal(pthread_t tid1, pthread_t tid2);
Returns: nonzero if equal, 0 otherwise
 
creazione nuovo thread:
	int pthread_create( pthread_t* tidp, const pthread_attr_t* attr, void *(*start_rtn)(void), void* arg );
Returns: 0 if OK, error number on failure.
"tidp" se la chiamata ha successo conterrà il TID del nuovo thread
"attr" se non è NULL può contenere opzioni di creazione:
	...
"start_rtn" è la "main" function del thread
"arg" se non è NULL contiene gli argomenti da passare alla main function
NOTA: poichè non si sà quando partirà il thread, accertarsi che "arg" resti allocato anche dopo l'invocazione della create.
NOTA2: la variabile di ritorno "void*" può essere utilizzata per ritornare qualunque tipo di dato. Tuttavia, affinchè essa sia ancora disponibile dopo la terminazione del thread, DEVE essere allocata all'esterno del thread, NON all'interno!
esempio d'uso:
	void* thr_fn( void* arg ); // prototipo main function thread
	pthread_t ntid;
	pthread_create( &ntid, NULL, thr_fn, NULL );

terminazione thread:
	return((void *)0); // invocato nella main function
	void pthread_exit(void *rval_ptr); // invocato all'interno del thread (qualsiasi funzione)
	int pthread_cancel(pthread_t tid); // invocato da un altro thread

registrazione/cancellazione exit handlers / thread cleanup handlers (~atexit):
	void pthread_cleanup_push(void (*rtn)(void *), void *arg);
	void pthread_cleanup_pop(int execute);

attesa (sincrona) terminazione thread (~waitpid):
	int pthread_join(pthread_t thread, void **rval_ptr);
"rval_ptr" se non è NULL conterrà il valore di ritorno, oppure PTHREAD_CANCELED.


 === Reentrancy ===
 
NOTA: reentrant => thread-safe, MA NON VICEVERSA
      reentrant !=> signal-safe

Le funzioni invocate dai threads DEVONO essere reentrant. -> usare solo syscalls reentrant e thread-safe
NON usare: strtok, strerror, readdir, rand, putenv, gethostbyaddr, gethostbyname, ...
piuttosto, controllare se è definita "_POSIX_THREAD_SAFE_FUNCTIONS"
 ed usare le alternative con su suffisso "_r":
	strtok_r, strerror_r, readdir_r, rand_r, ...
REGOLA D'ORO: Assume that a call is not thread-safe unless the manpage tells you otherwise.
NOTA: The POSIX threads specification does demand
      that the calls in the C library (including malloc() and
      printf()) be thread-safe.
Contrariamente a quanto si potrebbe pensare la malloc è thread-safe.
"Threads usually use malloc to allocate memory for their thread-specific data.
The destructor function usually frees the memory that was allocated.
If the thread exited without freeing the memory, then the memory would be lost: leaked by the process."


 === Condivisione ===

La condivisione dei dati globali del programma è effettuata automaticamente,
tuttavia occorre prendere alcuni accorgimenti:
 - qualificare le variabili singole con "volatile"
 - sincronizzarne l'accesso. possibilità:
    - mutua esclusione = un solo thread alla volta -> uso di mutexes
    - read/write-only = ammette più processi in lettura o uno solo in scrittura -> uso di ReaderWriter Locks

Si possono verificare problemi di concorrenza (race conditions)
se un thread legge ed un'altro scrive la stessa "risorsa" (locazione di memoria, file, etc),
oppure se più threads scrivono.

Tipi di lock:
- 2-state lock = lock/unlock -> mutex (exclusive lock)
- 3-state lock:
   - locked in read mode (shared lock)
   - locked in write mode (exclusive lock)
   - unlocked

  meccanismi:
 - *mutexes* ~= binary locks advisory
 - *ReaderWriter / shared-exclusive Locks* ~= 3-state r/wlocks

NOTA: i lock sono sempre *advisory*
NOTA2: l'associazione lock <-> dato protetto è sempre implicita.
E' compito del programmatore assicurarsi che un thread non acceda alle risorse condivise senza prima aver acquisito tutti i lock necessari.
NOTA3: con gli attributi di default, lockare 2 volte lo stesso mutex causa un deadlock.
Un deadlock può anche verificarsi se si richiedono più lock nell'ordine scorretto. -> vedi "" in db.txt

Per sincronizzare l'accesso ai "FILE" sono disponibili delle funzioni di locking specifiche:
	void flockfile(FILE *fp);
	int ftrylockfile(FILE *fp);
Returns: 0 if OK, nonzero if lock can't be acquired
NOTA: la prima bloccante, la seconda no.
NOTA2: This lock is recursive: you can acquire it again, while you already hold it, without deadlocking
	void funlockfile(FILE *fp);
NOTA IMPORTANTE: questo tipo di lock è solo application-level, e si basa sull'acquisizione di un mutex contenuto nella struct FILE passata come argomento.
Quindi, _non comporta l'acquisizione di un lock a livello di processo_, e
affinchè sia efficace è necessario che i threads accedano al file condiviso con la _stessa_ istanza di FILE.
In realtà, tutte le funzioni di I/O della standard library acquisiscono automaticamente questo mutex.
Questo primitive servono per serializzare _più_ operazioni di I/O, in modo che non vengano interfogliate dal thread scheduler.


 ==== Mutexes ====

Rappresentati con il tipo "pthread_mutex_t" (opaco).

OSS.: i mutex possono essere inclusi direttamente all'interno della struct che proteggono o all'esterno.

costruttore/distruttore:
	int pthread_mutex_init( pthread_mutex_t* mutex, const pthread_mutexattr_t* attr );
	int pthread_mutex_destroy( pthread_mutex_t* mutex );
esempio d'uso:
	// creazione con attributi di sicurezza predefiniti
	// static allotion
		pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
	// dynamic allocation
		pthread_mutex_t* m = malloc( sizeof(pthread_mutex_t) );
		pthread_mutex_init( m, NULL );
		//...
		pthread_mutex_destroy( m );

locking:
	int pthread_mutex_lock( pthread_mutex_t *mutex );
	int pthread_mutex_trylock(pthread_mutex_t *mutex);
La prima è bloccante,
la seconda fallisce e ritorna EBUSY invece di bloccarsi.

PROBLEMA: A thread will deadlock itself if it tries to lock the same mutex twice.
SOL.: definizione attributi di sicurezza
	
creazione con attributi di sicurezza:
	int pthread_mutex_init( pthread_mutex_t *mutex, const pthread_mutexattr_t* attr );
"attr" è una struct settata con:
	int pthread_mutexattr_settype(pthread_mutexattr_t* attr, int type);
"type" può essere:
	PTHREAD_MUTEX_ERRORCHECK -> la "lock" ritorna errore se si tenta di bloccare 2 volte lo stesso mutex
	PTHREAD_MUTEX_RECURSIVE -> consente più "lock" sullo stesso mutex
	...

esempio d'uso:
	// creazione di un mutex con controllo deadlock
		pthread_mutexattr_t attr;
		pthread_mutexattr_init(&attr);
		pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); // abilita error-checking per prevenire deadlocks
	pthread_mutex_t* m = malloc( sizeof(pthread_mutex_t) );
	pthread_mutex_init( m, &attr );
		pthread_mutexattr_destroy(&attr);


 === Thread-specific data ===

In alternativa alla condivisione si può scegliere di mantenere "private" delle aree di memoria.
Le variabili allocate nello stack sono private, mentre quelle globali, statiche e nello heap sono condivise.
Per evitare questa "condivisione automatica", al loro posto si può utilizzare thread-specifix data.
Ciò può essere utile per utilizzare in multithreading codice inizialmente scritto per girare in 1 solo thread.
(Ad es. per rendere le funzioni che fanno uso di memoria statica thread-safe)
...


 === Sincronizzazione ===
 
Tipi di sincronizzazione tra threads:
- "esterna" = aspetto che un thread termini del tutto prima di continuare -> join
- "interna" = aspetto che un thread raggiunga un certo "punto" prima di continuare -> mutexes, variabili di condizione (signal+wait)

Per evitare di fare busy waiting sulle variabili condivise si possono usare le *variabili di condizione*.
Le variabili di condizione consentono la sincronizzazione


= Appendice: funzioni non standard =

Sebbene non appartengano allo standard POSIX, queste funzioni sono cmq
 disponibili in molte implementazioni:
	<sys/queue.h> // BSD implementations of lists, tail queues, and circular queues
	inet_aton // from BSD sockets
	<ifaddrs.h> // BSD, get interface addresses 
	da <netdb.h>:
		getrpcent, getrpcbyname, getrpcbynumber, setrpcent, endrpcent // RPC handling
		rcmd, rresvport, iruserok, ruserok // routines for returning a stream to a remote command 
		rexec // return stream to a remote command 
		setnetgrent, endnetgrent, getnetgrent, getnetgrent_r, innetgr // handle network group entries
	<sys/file.h> // BSD "flock" 
	<err.h> // BSD error handling functions "err*" and "warn*" -> facilmente rimpiazzabili con funzioni standard
	<mp.h> // BSD MutiplePrecision types -> see also <gmp.h> http://gmplib.org/manual/BSD-Compatible-Functions.html
	<sysexits.h> // BSD preferable exit codes for programs
	<sys/cdefs.h> // BSD alternative for <sys/types.h>
	<net/if_dl.h> // BSD
	<net/route.h> // BSD
	<protocols/rwhod.h> // BSD
	<sys/sendfile.h> // transfer data between file descriptors
	<sys/vfs.h>, <sys/statfs.h> // BSD file system statistics
	<shadow.h> // General shadow password file API
	makedev, major, minor // device number management
	getdtablesize, getpagesize, mincore, ...
	<sys/time.h> // BSD timeval operations: timeradd, timersub, timercmp, timerclear, timerisset, settimeofday
	daemon // BSD daemonize command
	alloca // on-request stack memory allocation (*alloc-like)
	getloadavg // get system load averages
	<fstab.h> // handle fstab entries
	<rpc/des_crypt.h> // fast DES encryption
	<monetary.h> // convert monetary value to a string
	<mpool.h>, <db.h> // BSD shared memory buffer pool
	<utmp.h> // login records
	<ttyent.h> // get ttys file entry
	<sys/md4.h>
	<sys/md5.h> // MD5Data, MD5End, MD5File, MD5FileChunk, MD5Final, MD5Init, MD5Pad, MD5Transform, MD5Update
	arc4random, arc4random_addrandom, arc4random_buf, arc4random_stir, arc4random_uniform // Arc4 random number generator
	bsd_getopt
	humanize_number, dehumanize_number // format a number into a human readable form and viceversa
	fgetln // get a line from a stream -> usare "getline"
	fparseln  // return the next logical line from a stream
	flopen // Reliably open and lock a file
	fmtcheck // sanitizes user-supplied printf-style format string
	fpurge // purge a stream (discards any unwritten output)
	getmode, setmode //  modify mode bits
	heapsort, mergesort, radixsort
	inet_net_pton
	<nlist.h> // get entries from symbol table
	pidfile_open, pidfile_write, pidfile_close, pidfile_remove
	readpassphrase // get a passphrase from the user
	getdtablesize // get descriptor table size
	strlcpy, strlcat, strmode, strtonum
	setproctitle
	getprogname, setprogname
	closefrom // closes all open file descriptors greater than or equal to
	<vis.h> // strnvis, strnunvis, strunvis, strunvisx, strvis, strvisx, unvis, vis
	...

TIP: gcc fornisce l'opzione "-lbsd-compat" per migliorare la compatibilità con BSD.


 = Fonti =

http://en.wikipedia.org/wiki/C_POSIX_library
reference of the POSIX functions which are not part of the Standard C Library http://cplus.kompf.de/posixlist.html
Advanced Linux Programming http://www.advancedlinuxprogramming.com
GaPiL -- Guida alla Programmazione in Linux http://gapil.truelite.it/