User:The Anome/NRIS kml extractor


 * 1) Copyright (c) 2011 The Anome
 * 2) Permission is hereby granted, free of charge, to any person obtaining a copy
 * 3) of this software and associated documentation files (the "Software"), to deal
 * 4) in the Software without restriction, including without limitation the rights
 * 5) to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * 6) copies of the Software, and to permit persons to whom the Software is
 * 7) furnished to do so, subject to the following conditions:
 * 8) The above copyright notice and this permission notice shall be included in
 * 9) all copies or substantial portions of the Software.
 * 10) THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * 11) IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * 12) FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * 13) AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * 14) LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * 15) OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * 16) THE SOFTWARE.
 * 1) LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * 2) OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * 3) THE SOFTWARE.

import string, re

placemarks = re.findall(r"(?ms).*?", open("doc.kml").read)

cdata_re = r"""(?ms)<!\[CDATA\[Historic Place Name: (.*?) Address: (.*?) City: (.*?) County: (.*?) State: (.*?)

Geographic Coordinates: Latitude: (.*?) Longitude: (.*?)

NPS Reference Number: (.*?) Date Listed: (.*?) Notes: (.*?) Type: (.*?) Geocode Match: (.*?)

A Service of: National Register of Historic Places National Park Service \]\]>"""

for placemark in placemarks: fields = re.findall(r"(?ms)^.*? (.*?) .*? (.*?) .*? (.*?),(.*?),(.*?) .*?$", placemark) try: name, description, lon, lat, etc = fields[0] description = string.join(string.split(description)) cdata = re.findall(cdata_re, description) print string.join([repr(string.strip(x)) for x in ["OK", name, lat, lon] + list(cdata[0])], ", ") except: print string.join([repr(string.strip(x)) for x in ["ERROR", name, lat, lon]], ", ")