#include <unistd.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <assert.h>

/* 

Once upon a time, a Compact Flash card containing a bunch of pictures 
(jpeg images) was accidentally formatted. I wanted the pictures back, 
so I rescued them.

This is a small program that I wrote to do this. It searches a 
filesystem for what appear to be JPEG files, and then it extracts these
JPEG files.

The workings of JpegRescue are described in this journal entry:

  http://www.livejournal.com/users/nibot_lab/12304.html

This program is pretty bare-bones at the moment -- you'll have to follow
the directions in the journal entry (linked above) and modify the program
appropriately.

  Tobin Fricke                      
  
  Berkeley, California - April 2004 

  http://splorg.org/people/tobin/projects/jpegrescue   <tobin@splorg.org>

*/


char *pattern = "Exif";
int   pattern_len = 4;
int   pattern_ofs = 10;

void extract(int src, int offset, int length) {
 char filename[13];
 static int serial = 0;
 
 sprintf(filename,"%d.jpg",serial++);
 printf("\n extracting from offset 0x%x to \"%s\"\n",offset,filename);
 char *buffer = (char *)calloc(1, length);
 assert(buffer);
 
 int dst = open(filename, O_WRONLY | O_NOCTTY | O_CREAT, S_IRUSR | S_IWUSR);
 if (dst == -1) { perror("fopen failed"); exit(0); }
 off_t old_spot = lseek(src, 0, SEEK_CUR);
 
 lseek(src, offset, SEEK_SET);
 read(src, buffer, length);
 write(dst, buffer, length);
 close(dst);
 lseek(src, old_spot, SEEK_SET);
}

/* 
   JPEG files are stored using big-endian byte order (eg, "the way God
   intended"), so we have to swap the high and low bytes of a word if we're
   working on an Intel machine.
*/

void wordswap(unsigned short int *word) {
  *word = (*word >> 8) + ((*word & 0xFF) << 8);
}

/* 
   These are the offests at which I found the string "Exif" in the filesytem.
   The actual JPEG files begin 6 bytes before these offsets.   I used the
   unix command "strings -t x" piped through "grep Exif" to produce these 
   numbers.
*/

int search(int fd, char *string, int n, int offset, int actually_search, int file_len) {
  int i = 0;
  char achar;
  
  /* a very primitive pattern finder */
  
  lseek(fd, offset, SEEK_SET);
  
  while (i < n) {
    read(fd, &achar, 1);
    if (achar == string[i]) 
      i++;
    else {
      offset = lseek(fd, n - i, SEEK_CUR); 
      i = 0;
      if ((offset >= file_len) || (!actually_search)) return -1;
    }
  }
  return lseek(fd, 0, SEEK_CUR); 
}

int main(int argc, char **argv) {

  int fd;
  int success = 0, failure = 0;
  
  if (argc != 2) {
    printf("Jpeg Rescue -- please see http://splorg.org/people/tobin/projects/jpegrescue/ for documentation\n");
    printf("Usage: %s filename\n",argv[0]);
    exit(1);
  }
  
  fd = open(argv[1], O_RDONLY | O_NOCTTY);
  
  if (fd == -1) {
    perror("fopen failed");
    exit(0);
  }

  int file_len = lseek(fd, 0, SEEK_END); 
  printf("The source file contains %d bytes (%d MB)\n",
	 file_len, file_len / (1024*1024));

  unsigned char byte;
  unsigned short int word;
  
  assert(sizeof(word) == 2);
  
  int offset = 0; /* Where we start looking */

  printf("Searching...\n");
  
  while ((offset = search(fd, pattern, pattern_len, offset, 1, file_len)) != -1) {
    
    printf("Found pattern at offset 0x%x\n", offset);

    int ofs_start = lseek(fd, offset - pattern_ofs, SEEK_SET);
    int ofs_end = 0;
    
    while(1) { 
      read(fd, &byte, 1);
      
      if (byte != 0xFF) {
	printf(" ** JPEG Marker not found  -- Aborting \n");
	failure ++;
	break;
      }
      
      read(fd, &byte, 1);
      
      if (byte == 0xD8) { printf("START OF IMAGE\n"); continue; }
      else if(byte == 0xD9) { printf("END OF IMAGE\n"); break; }
      
      read(fd, &word, 2);
      wordswap(&word); 
      
      printf("Found Marker %02x with %d bytes data -- ",byte, word);
      int newpos = lseek(fd, word -2, SEEK_CUR);
      printf("advancing to position 0x%x\n",newpos);
      
      if (byte == 0xDA) { 
	printf("That was START OF SCAN -- scanning for 0xFFD9\n");
	while (1) { 
	  read(fd, &word, 2);
	  ofs_end = lseek(fd,0,SEEK_CUR);
	  wordswap(&word);
	  if (word == 0xFFD9) {
	    printf("Found END OF SCAN at offset 0x%x\n",lseek(fd,-2,SEEK_CUR));
	    extract(fd,ofs_start,ofs_end - ofs_start + 1);
	    success ++;
	    break;
	  }
	}
      }
    }
    
  }
  printf("%d images extracted successfully, %d failures\n",success,failure);
  return 0;
}
      




 

