10. String Operations#

10.1. Introduction#

10.2. Learning Objectives#

10.3. Creating and Manipulating Strings#

10.3.1. Creating Strings#

# Different ways to create strings
location_name = "Mount Everest"  # Using double quotes
country = "Nepal"  # Using single quotes
description = """Mount Everest is the highest peak
in the world, located in the Himalayas."""  # Multi-line string

print(f"Location: {location_name}")
print(f"Country: {country}")
print(f"Description: {description}")

10.3.2. String Concatenation#

# Basic concatenation using the + operator
location_full = location_name + ", " + country
print(f"Full location: {location_full}")

# Building a file path
data_folder = "geographic_data"
filename = "mountain_peaks.csv"
file_path = data_folder + "/" + filename
print(f"File path: {file_path}")

10.3.3. String Repetition#

# Create visual separators
separator = "-" * 30
print(separator)
print("Geographic Data Report")
print(separator)

# Create formatted spacing
tab_space = " " * 4
print(f"Location:{tab_space}{location_name}")
print(f"Elevation:{tab_space}8,848 meters")

10.3.4. String Length and Basic Properties#

# Get the length of a string
location_length = len(location_name)
print(f"The location name '{location_name}' has {location_length} characters")

# Check if a string contains only letters
city_name = "SanFrancisco"
print(f"Is '{city_name}' alphabetic? {city_name.isalpha()}")

# Check if a string contains only digits (useful for coordinate validation)
zip_code = "94102"
print(f"Is '{zip_code}' numeric? {zip_code.isdigit()}")

10.3.5. Building Dynamic Content#

# Building dynamic location descriptions
latitude = 27.9881
longitude = 86.9250
elevation = 8848

location_info = (
    location_name
    + " is located at coordinates "
    + str(latitude)
    + ", "
    + str(longitude)
)
print(location_info)

# A more complex example - building a geographic summary
cities = ["Kathmandu", "Pokhara", "Lalitpur"]
summary = "Major cities in " + country + " include: " + ", ".join(cities)
print(summary)

10.4. String Methods for Geospatial Data#

10.4.1. Case Conversion Methods#

# Case conversion examples
mountain_name = "Mount Everest"

# Convert to different cases
print(f"Original: {mountain_name}")
print(f"Uppercase: {mountain_name.upper()}")
print(f"Lowercase: {mountain_name.lower()}")
print(f"Title case: {mountain_name.title()}")
print(f"Capitalize: {mountain_name.capitalize()}")

10.4.2. Whitespace Removal Methods#

# Whitespace removal examples
messy_location = "   San Francisco   "
messy_left = "   Los Angeles"
messy_right = "Chicago   "

print(f"Original: '{messy_location}'")
print(f"strip(): '{messy_location.strip()}'")  # Remove both sides
print(f"lstrip(): '{messy_left.lstrip()}'")  # Remove left side
print(f"rstrip(): '{messy_right.rstrip()}'")  # Remove right side

10.4.3. String Replacement#

# Basic replacement
location = "Mount Everest, Nepal"
updated_location = location.replace("Everest", "Kilimanjaro")
print(f"Original: {location}")
print(f"Updated: {updated_location}")

# Replace multiple occurrences
path_string = "data/raw_data/geographic_data/raw_data/points.csv"
clean_path = path_string.replace("raw_data/", "")
print(f"Original path: {path_string}")
print(f"Clean path: {clean_path}")

10.4.4. String Splitting#

# Basic splitting
location_full = "Mount Everest, Nepal, Asia"
location_parts = location_full.split(", ")
print(f"Original: {location_full}")
print(f"Split into parts: {location_parts}")

# Extract individual components
mountain, country, continent = location_parts
print(f"Mountain: {mountain}")
print(f"Country: {country}")
print(f"Continent: {continent}")
# Splitting coordinate strings
coordinate_string = "40.7128,-74.0060"
lat_str, lon_str = coordinate_string.split(",")
latitude = float(lat_str)
longitude = float(lon_str)
print(f"Parsed coordinates: Lat={latitude}, Lon={longitude}")
# Splitting file paths
file_path = "data/geographic/cities/world_cities.csv"
path_components = file_path.split("/")
print(f"Path components: {path_components}")
print(f"Filename: {path_components[-1]}")  # Last component is the filename

10.4.5. String Joining#

# Basic joining
city_names = ["San Francisco", "New York", "Tokyo"]
city_name = ", ".join(city_names)
print(f"Joined city name: {city_name}")
# Creating file paths
path_parts = ["data", "geographic", "elevation", "dem.tif"]
full_path = "/".join(path_parts)
print(f"Full path: {full_path}")
# Practical example: creating coordinate strings
coordinates = ["40.7128", "-74.0060"]
coordinate_string = ",".join(coordinates)
print(f"Coordinate string: {coordinate_string}")

10.5. String Formatting#

10.5.2. Formatting Numbers in Strings#

# Controlling decimal places for coordinates
precise_lat = 40.712776
precise_lon = -74.005974

# Round to different decimal places
coords_2_places = f"Coordinates: ({precise_lat:.2f}, {precise_lon:.2f})"
coords_4_places = f"Coordinates: ({precise_lat:.4f}, {precise_lon:.4f})"

print(coords_2_places)
print(coords_4_places)

# Adding thousands separators for large numbers
population = 8336817
area_sqkm = 783.8

formatted_stats = f"NYC Population: {population:,} people, Area: {area_sqkm:.1f} km²"
print(formatted_stats)

10.5.3. Legacy Formatting Methods#

# Using .format() method
location = "San Francisco"
lat = 37.7749
lon = -122.4194

# Basic format method
formatted_1 = "Location: {} at coordinates ({}, {})".format(location, lat, lon)
print(formatted_1)

# With positional arguments
formatted_2 = "Location: {0} at coordinates ({1}, {2})".format(location, lat, lon)
print(formatted_2)

# With named arguments
formatted_3 = "Location: {name} at coordinates ({latitude}, {longitude})".format(
    name=location, latitude=lat, longitude=lon
)
print(formatted_3)

10.5.4. Practical Formatting Examples#

# Creating file names with timestamps and coordinates
import datetime

current_time = datetime.datetime.now()
survey_lat = 45.3311
survey_lon = -121.7113

filename = f"survey_{current_time.strftime('%Y%m%d')}_{survey_lat:.4f}N_{abs(survey_lon):.4f}W.csv"
print(f"Generated filename: {filename}")

# Creating Well-Known Text (WKT) representations
wkt_point = f"POINT({survey_lon} {survey_lat})"
print(f"WKT Point: {wkt_point}")
# Building SQL queries with formatting
table_name = "cities"
min_population = 1000000
region = "North America"

sql_query = f"""SELECT name, latitude, longitude
FROM {table_name}
WHERE population > {min_population:,}
AND region = '{region}'"""

print("Generated SQL Query:")
print(sql_query)

10.6. String Operation Decision Guide#

10.6.1. When to Use Each Operation#

10.7. Key Takeaways#

10.8. Exercises#

10.8.1. Exercise 1: Manipulating Geographic Location Strings#

10.8.2. Exercise 2: Extracting and Formatting Coordinates#

10.8.3. Exercise 3: Building Dynamic SQL Queries#

10.8.4. Exercise 4: String Normalization and Cleaning#

10.8.5. Exercise 5: Parsing and Extracting Address Information#