//------------------------------------------------------------------- // grabfile.cpp // // This is our initial prototype for an HTTP client which lets // a user obtain a file-object from an HTTP server by entering // that object's URL as a string-argument on the command-line. // // to compile: $ g++ grabfile.cpp -o grabfile // to execute: $ ./grabfile // // NOTE: A test-case: $ ./grabfile www.cs.usfca.edu/index.html // // programmer: ALLAN CRUSE // written on: 17 MAR 2009 //------------------------------------------------------------------- #include // for gethostbyname() #include // for printf(), perror() #include // for open() #include // for exit() #include // for read(), write(), close() #include // for strncpy() #include // socket(), connect() int main( int argc, char **argv ) { //--------------------------------------------- // setup default-values for the URL components //--------------------------------------------- char proto[ 5 ] = "http"; char host[ 64 ] = "localhost"; char port[ 8 ] = "80"; char path[ 80 ] = "./index.html"; char url[ 128 ] = { 0 }; //------------------------------------------- // now copy the entire command-line argument //------------------------------------------- if ( argc == 1 ) { fprintf( stderr, " missing URL \n" ); exit(1); } int len = strlen( argv[ 1 ] ); if ( len < sizeof( url ) ) strcpy( url, argv[ 1 ] ); else { fprintf( stderr, " argument-string too long \n"); exit(1); } //------------------------------------------- // strip away the 'method' field, if present //------------------------------------------- char *ptr = strstr( url, "http://" ); if ( ptr ) strcpy( url, ptr+7 ); else if ( strstr( url, "://" ) ) { fprintf( stderr, " protocol is not \'http\' \n" ); exit(1); } //-------------------------------------------------- // first let's separate away the 'path', if present //-------------------------------------------------- strtok_r( url, "/", &ptr ); len = ( ptr ) ? strlen( ptr ) : 0; if ( len ) strncpy( path+2, ptr, 77 ); //------------------------------------------------------- // next let's separate the 'host' and 'port', if present //------------------------------------------------------- // Here we use the 'strspn()' and 'strcspn()' functions // from the string-library to assist in parsing various // degenerate cases (in which we want to keep defaults) // such as: no hostname preceeds the initial colon, and // no port-number follows the initial run of (possibly) // several consecutive colons, or colons do not appear. // we will use these four counters to aid us in parsing // of what may be a 'malformed' URL string, keeping our // default-strings in case host and/or port are missing int run0 = strcspn( url, ":" ); int run1 = strspn( url+run0, ":" ); int run2 = strcspn( url+run0+run1, ":" ); int run3 = strspn( url+run0+run1+run2, ":" ); // printf( " run of %d noncolons \n", run0 ); // printf( " run of %d colons \n", run1 ); // printf( " run of %d noncolons \n", run2 ); // printf( " run of %d colons \n", run3 ); strtok( url+run0+run1, ":" ); // to omit trailing garbage if ( run0 ) { // the normal case: hostname-field is present strtok_r( url, ":", &ptr ); if ( ptr ) ptr += strspn( ptr, ":" ); len = ( ptr ) ? strlen( ptr ) : 0; if ( len ) strncpy( port, ptr, 7 ); strncpy( host, url, 63 ); } else { // degenerate case: the hostname-field is absent strtok( url, ":" ); ptr = url + strspn( url, ":" ); len = strcspn( ptr, ":" ); if ( len ) strncpy( port, ptr, 7 ); } //------------------------------------------------------ // show the user how we have interpreted the URL-string //------------------------------------------------------ printf( "\n Uniform Resource Locator is: " ); printf( "<%s://%s:%s/%s> \n", proto, host, port, path ); //------------------------------------------------- // now we get the server's hostname and IP-address //------------------------------------------------- char peername[ 64 ] = { 0 }; strncpy( peername, host, 63 ); struct hostent *pp = gethostbyname( peername ); if ( !pp ) { herror( "gethostbyname" ); exit(1); } char peeraddr[ 16 ] = { 0 }; strcpy( peeraddr, inet_ntoa( *(in_addr*)pp->h_addr ) ); //--------------------------------------------------- // we convert the port-number from ascii to a number //--------------------------------------------------- int portno = atoi( port ); //--------------------------------------------------------- // we initialize a socket-address structure for the server //--------------------------------------------------------- struct sockaddr_in paddr = { 0 }; socklen_t palen = sizeof( paddr ); paddr.sin_family = AF_INET; paddr.sin_port = htons( portno ); paddr.sin_addr.s_addr = inet_addr( peeraddr ); //------------------------------------------------------ // we create a socket for communicating with the server //------------------------------------------------------ int sock = socket( AF_INET, SOCK_STREAM, IPPROTO_TCP ); if ( sock < 0 ) { perror( "socket" ); exit(1); } //------------------------------------------- // we establish a connection with the server //------------------------------------------- if ( connect( sock, (sockaddr*)&paddr, palen ) < 0 ) { perror( "connect" ); exit(1); } printf( "\n connected to \'%s\' on port %d \n", peername, portno ); //------------------------------------ // we create the HTTP Request-message //------------------------------------ char request[ BUFSIZ ]; len = 0; len += sprintf( request+len, "GET " ); len += sprintf( request+len, "%s ", path ); len += sprintf( request+len, "HTTP/1.0\r\n" ); len += sprintf( request+len, "\r\n" ); //------------------------------------------------- // we write the HTTP Request-message to the stream //------------------------------------------------- int txbytes = write( sock, request, len ); if ( txbytes < 0 ) { perror( "write" ); exit(1); } fflush( fdopen( sock, "rw" ) ); printf( "\n sent %d bytes to server \n", txbytes ); //------------------------------------------------- // our experiments showed some delay was essential //------------------------------------------------- usleep( 50000 ); // 50-millisecond delay //--------------------------------------------------- // we read the HTTP Response-message from the stream //--------------------------------------------------- char response[ BUFSIZ ] = { 0 }; int rxbytes = read( sock, response, BUFSIZ ); if ( rxbytes < 0 ) { perror( "read" ); exit(1); } printf( " received %d bytes from the server \n", rxbytes ); //-------------------------------------- // we display the HTTP Response-message //-------------------------------------- printf( "\n" ); printf( "\e[37;40m" ); printf( "%s", response ); printf( "\e[0m" ); printf( "\n" ); //----------------------------------------- // we close our connection with the server //----------------------------------------- close( sock ); }