/* Copyright 2002 Daniel Egnor.  See LICENSE file.
 *
 * This program is part of the processing chain to convert TIGER/Line data
 * into an index ("map") for rapid address lookup.  It reads "step1" data as
 * generated by "geo-tiger-to-1", starts constructing a map file, and
 * writes "step2" data as consumed by "geo-2-to-2a" and "geo-2-to-3".
 *
 * Each line of the input file corresponds to a point on one side of a street:
 *
 * G12345EMain Street  //        1234-122123768 47164378
 * |<zip>|<----street--//-><--addr--><--long--><--lat-->
 * type  parity        //
 * ('G') ('E'ven or 'O'dd addresses)
 *
 * This step combines all of the points associated with the same zip code,
 * parity, and street name into a compressed zone in the map file.  For
 * each such zone a line is written giving its name and offset:
 *
 * NEMain Street  //  12345   323232312345   3232454
 * ||<----street--//-><zip><-offset-><zip><-offset->
 * |parity        //  <----start----><-----end----->
 * type ('N')
 *
 * Because records are input in sorted order, zip code and zone offset are
 * monotonically related.
 */

#include "io.h"

#include <stdlib.h>
#include <string.h>
#include <stdio.h>

int main(int argc,const char *argv[]) {
	struct io_file *index;
        int zip_pointer,zip_offset;
        int name_pointer,end_pointer;
	int end;

	int last_zip;

	char line[256],prev[256] = "";
        int base_long = 0,base_lat = 0,base_addr = 0;
        int prev_addr = 0,prev_offset = 0;

	if (argc != 2) {
		fprintf(stderr,"usage: %s map-file < step1-data > step2-data\n",*argv);
		return 2;
	}

	index = io_open(argv[1]);
	if (NULL == index) return 1;

        /* Start the map file table of contents */
        end = 0;
        end = io_out_i4(index,zip_pointer = end,0);
        end = io_out_i4(index,name_pointer = end,0);
        end = io_out_i4(index,end_pointer = end,0);
	if (end < 0) return 1;

        /* The ZIP code table is just an array of 100000 pointers */
        io_out_i4(index,zip_pointer,zip_offset = end);
	last_zip = -1;
        end = end + 100000 * 4;

        /* Build the address zone table, that's what we do */
	while (NULL != fgets(line,sizeof line,stdin)) {
                int line_long,line_lat,line_addr;

		if (strlen(line) < 90) {
			fputs("warning: truncated input line\n",stderr);
			continue;
		}

		if (line[0] != 'G') {
			fputs("warning: invalid input record\n",stderr);
			continue;
		}

		if (strncasecmp(line,prev,78) < 0) {
			fputs("warning: input line out of order\n",stderr);
			continue;
		}

                /* When writing an address zone, to save space we store a run
                 * of deltas from a base street address, latitude, and 
                 * longitude.  When any delta exceeds the size that can fit
                 * in a byte, we start a new run with new base values. */
                line_long = io_strntoi(line + 71,9) - base_long;
                line_lat = io_strntoi(line + 81,8) - base_lat;
                line_addr = io_strntoi(line + 59,11) / 2 - base_addr;
                if ('X' == line[70]) ++line_addr;

		if (strncasecmp(line,prev,59)
                ||  abs(line_addr >> 16) > 0
                ||  abs((128+line_long) >> 8) > 0
                ||  abs((128+line_lat) >> 8) > 0) {
                        /* If it's a new street, write a new output line. */
                        if (strncasecmp(line,prev,59)) {
                                /* New zip code -> update the zip code table */
                                if (memcmp(line,prev,6)) {
                                        const int next_zip = 
						io_strntoi(line + 1,5);
                                        while (last_zip != next_zip)
        					io_out_i4(index,zip_offset +
                                                          4 * ++last_zip,end);
                                }

				if (0 != prev_offset)
					printf("%.5s%10d\n",line + 1,end);
				printf("N%.53s%.5s%10d",line + 6,line + 1,end);
			}

                        /* Either the street changed, or the delta since the
                         * last base exceeds the maximum delta size; either
                         * way, write a new base entry. */
                        if (0 != prev_offset) io_out_i4(index,prev_offset,end);
                        end = io_out_i4(index,prev_offset = end,0);
                        end = io_out_i4(index,end,base_long += line_long);
                        end = io_out_i4(index,end,base_lat += line_lat);
                        end = io_out_i4(index,end,base_addr += line_addr);
			if (end < 0) return 1;
                        line_addr = line_long = line_lat = 0;
		}
                else if ('X' == prev[70] 
                     && line_addr == prev_addr
                     && !memcmp(line + 71,prev + 71,19)) {
                        io_out(index,end - 1,line + 70,1);
                        continue;
                }

                /* Write compressed delta values. */
                end = io_out_i2(index,end,line_addr);
                end = io_out_i1(index,end,line_long);
                end = io_out_i1(index,end,line_lat);
                end = io_out(index,end,line + 70,1);
		if (end < 0) return 1;

		strcpy(prev,line);
                prev_addr = line_addr;
	}

        /* Terminate the last output line */
        if (0 != prev_offset) {
                io_out_i4(index,prev_offset,end);
		printf("     %010d\n",end);
        }

        /* Finish writing the zip code table */
	while (last_zip != 99999)
		io_out_i4(index,zip_offset + 4 * ++last_zip,end);

        /* Update the table of contents */
        io_out_i4(index,name_pointer,end);
        io_out_i4(index,end_pointer,end);
	io_close(index);
	return 0;
}
