__rulesetname__ = "preen"
__version__ = "0.7"
__author__ = "Ted Percival <ted@midg3t.net>"

# Purpose:
# 1. Align TIGER-generated tags with OpenStreetMap and geo-local conventions.
# 2. Derive tags from TIGER- & GNIS-sourced names.
#
# In other words, preen the data.
#
# How to use:
# Get change_tags.py from
# http://trac.openstreetmap.org/browser/applications/utils/change_tags/
# and use the --module option to specify this module (preen).
#
# This ruleset was formerly known as gridstreetnames, but with the addition
# of non-streetname-related functionality it was renamed to preen.

import re

def preen(tags, type):
    changed = False

    # Street name munging
    if strip_st_from_grid_streets(tags, type):
        changed = True
    if collapse_whitespace(tags,type):
        changed = True
    if expand_directions(tags, type):
        changed = True
    if streetname_suffixes(tags, type):
        changed = True

    # Tag detection
    if add_highway_refs(tags, type):
        changed = True
    if tag_places_of_worship(tags, type):
        changed = True

    return changed

def created_by():
    return "change_tags.py: %s %s" % (__rulesetname__, __version__)

def strip_st_from_grid_streets(tags, type):
    """For grid streets with names such as "South 400 West St", remove
       the "St" or "Street" suffix because it is not used."""

    if type == "way" and "highway" in tags and "name" in tags:
        new_name = re.sub(
        r'\b(N(orth)?|S(outh)?|E(ast)?|W(est)?) +(\w+) +(N(orth)?|S(outh)?|E(ast)?|W(est)?) +St(reet)?$',
        r'\1 \6 \7',
        tags["name"])
        if new_name != tags["name"]:
            tags["name"] = new_name
            tags["created_by"] = created_by()
            return True

def expand_directions(tags, type):
    """For ways with names such as "S 400 West",
       expand the "S" to "South" and so on."""

    map = {
        r'\bN\b': r'North',
        r'\bS\b': r'South',
        r'\bE\b': r'East',
        r'\bW\b': r'West',
      }
    remap = {}

    for key, value in map.items():
        pat = re.compile(key)
        remap[pat] = value
        
    changed = False
    
    if type == "way" and "name" in tags:
        for key, value in remap.items():
            new_name = key.sub(value, tags["name"])
            if new_name != tags["name"]:
                tags["name"] = new_name
                tags["created_by"] = created_by()
                changed = True

    return changed

def collapse_whitespace(tags, type):
    """Collapse sequences of two or more spaces into a single space in
       the given object's "name" tag"""

    if "name" in tags:
        new_name = re.sub(r'  +', r' ', tags["name"])
        if new_name != tags["name"]:
            tags["name"] = new_name
            tags["created_by"] = created_by()
            return True

    return False

def streetname_suffixes(tags, type):
    """Expand street name suffixes, for example St -> Street and
    Ave -> Avenue. """

    # List of keys based on
    # `grep name_type area.osm | sort -u`
    map = {
            "Aly":  "Alley",
            "Ave":  "Avenue",
            "Blvd": "Boulevard",
            "Brg":  "Bridge",
            "Cir":  "Circle",
            "Ct":   "Court",
            "Cv":   "Cove",
            "Dr":   "Drive",
            "Expy": "Expressway",
            "Hwy":  "Highway",
            "Ln":   "Lane",
            #"Loop": "Loop",
            "Mal":  "Mall",
            "Pky":  "Parkway",
            "Pl":   "Place",
            "Rd":   "Road",
            #"Run":  "Run",
            "St":   "Street",
            "Ter":  "Terrace",
            "Tpke": "Turnpike",
            "Trl":  "Trail",
            #"Way":  "Way",
            "Xing": "Crossing",
            }
    remap = {}

    for key, value in map.items():
        # Abbreviations must be the last word in the value
        pat = re.compile(" " + key + "$")
        remap[pat] = " " + value
        
    changed = False
    
    if type == "way" and "name" in tags:
        for key, value in remap.items():
            new_name = key.sub(value, tags["name"])
            if new_name != tags["name"]:
                tags["name"] = new_name
                tags["created_by"] = created_by()
                changed = True
                break # There will not be subsequent matches

    return changed

def add_highway_refs(tags, type):
    """Detect highway reference numbers from TIGER-imports.
       These tend to be in a supplementary name tag (eg "name_1" or "name 1")
       containing a string like "State Route nn" or "United States Highway nn"
       Iff there is no "ref" tag, it is added based on the detected
       reference(s). There may be multiple references.
    """

    # Skip elements we don't care about
    # In particular, if the way already has a "ref" then we don't touch it.
    if type != "way" or "highway" not in tags or "ref" in tags:
        return False

    changed = False
    new_refs = []
    for key, val in tags.iteritems():
        if key.startswith("name"): # Include keys like "name_1" or "name 1"
            match = re.match("State Route (\w+)", val)
            if match:
                new_refs.append("SR-%s" % match.group(1))

            match = re.match("United States Highway (\w+)", val)
            if match:
                new_refs.append("US-%s" % match.group(1))

    if len(new_refs) != 0:
        changed = True
        # TODO: sort I-* before US-* before SR-*
        tags["ref"] = ";".join(new_refs)

    if changed:
        tags["created_by"] = created_by()

    return changed

def tag_places_of_worship(tags, type):
    """The GNIS import contains places of worship.
       Religion and denomination tags can be derived from the
       name tag in some cases."""

    # Originally based on the GNIS data:
    # grep '|Church|' UT_Features_20090203.txt  | cut -d'|' -f2 | sort -u
    maps = [
            # lowercase_search_term, religion,      denomination
            ["apostolic",           "christian",    "apostolic"],
            ["baptist",             "christian",    "baptist"],
            ["catholic",            "christian",    "catholic"],
            ["christ scientist",    "christian",    "christ_scientist"],
            ["christian science",   "christian",    "christ_scientist"],
            ["cogic",               "christian",    "pentecostal"],
            ["episcopal",           "christian",    "anglican"],
            ["foursquare",          "christian",    "foursquare"],
            ["greek orthodox",      "christian",    "greek_orthodox"],
            ["jehovah",             "christian",    "jehovahs_witness"],
            ["latter day saints",   "christian",    "mormon"],
            ["lutheran",            "christian",    "lutheran"],
            ["methodist",           "christian",    "methodist"],
            ["pentecostal",         "christian",    "pentecostal"],
            ["presbyterian",        "christian",    "presbyterian"],
            ["seventh day adventist","christian",   "seventh_day_adventist"],
            ["wesleyan",            "christian",    "methodist"],
            # Non-denominational matches
            ["buddhist",            "buddhist",     None],
            ["christian",           "christian",    None],
            ["islamic",             "muslim",       None],
            ["scientology",         "scientologist",None],
            # Hopefully "church" doesn't have too many false positives.
            # Since we are only matching amentity=place_of_worship, it should
            # be reasonably accurate. Must be *after* "scientolgy"
            ["church",              "christian",    None],
            ["synagogue",           "jewish",       None],
           ]

    changed = False
    if "amenity" in tags and tags["amenity"] == "place_of_worship":
        for [search_term, religion, denomination] in maps:
            if "name" in tags and tags["name"].lower().find(search_term) != -1:
                if "religion" not in tags:
                    tags["religion"] = religion
                    changed = True
                if denomination is not None and \
                        "denomination" not in tags and tags["religion"] == religion:
                    tags["denomination"] = denomination
                    changed = True
                break # Don't bother searching for other matches

    if changed:
        tags["created_by"] = created_by()

    return changed

# vim: ts=4 sw=4 et
