Text Processing: grep, sed, awk
Master the essential text processing tools: grep for searching, sed for stream editing, and awk for data extraction and reporting.
📖 6 min read📅 2026-02-10Text Processing
grep — Global Regular Expression Print
Basic Usage
# Search for pattern in file
grep "error" logfile.txt
# Case-insensitive
grep -i "error" logfile.txt
# Recursive search in directory
grep -r "TODO" src/
# Show line numbers
grep -n "function" script.sh
# Count matches
grep -c "404" access.log
# Invert match (show non-matching lines)
grep -v "DEBUG" app.log
# Show only matching part
grep -o "error[0-9]*" logfile.txt
# Multiple patterns
grep -E "error|warning|critical" logfile.txt
# Or:
grep "error\|warning\|critical" logfile.txtgrep with Regular Expressions
# Extended regex (-E or egrep)
grep -E "^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}" access.log
# Match beginning of line
grep "^ERROR" logfile.txt
# Match end of line
grep "\.conf$" file_list.txt
# Match whole words only
grep -w "error" logfile.txt # Matches "error" not "errors"
# Match files containing pattern
grep -l "TODO" *.py # List filenames only
grep -L "TODO" *.py # List files NOT matching
# Context: lines before/after match
grep -B 3 "error" logfile.txt # 3 lines before
grep -A 5 "error" logfile.txt # 5 lines after
grep -C 2 "error" logfile.txt # 2 lines before and afterPractical grep Examples
# Find IP addresses in logs
grep -oE "\b[0-9]{1,3}(\.[0-9]{1,3}){3}\b" access.log
# Find TODO/FIXME comments in code
grep -rn "TODO\|FIXME\|HACK\|XXX" --include="*.py" src/
# Count unique errors
grep -i "error" app.log | sort -u | wc -l
# Search compressed files
zgrep "pattern" file.gzsed — Stream Editor
Basic Substitution
# Replace first occurrence per line
sed 's/old/new/' file.txt
# Replace ALL occurrences per line (global)
sed 's/old/new/g' file.txt
# Case-insensitive replace
sed 's/old/new/gi' file.txt
# Replace in-place (modify file)
sed -i 's/old/new/g' file.txt
# In-place with backup
sed -i.bak 's/old/new/g' file.txtLine Operations
# Print specific lines
sed -n '5p' file.txt # Line 5
sed -n '5,10p' file.txt # Lines 5-10
sed -n '1~2p' file.txt # Odd lines
sed -n '2~2p' file.txt # Even lines
# Delete lines
sed '5d' file.txt # Delete line 5
sed '5,10d' file.txt # Delete lines 5-10
sed '/pattern/d' file.txt # Delete matching lines
sed '/^$/d' file.txt # Delete empty lines
sed '/^#/d' file.txt # Delete comment lines
# Insert/Append
sed '3i\New line before 3' file.txt # Insert before line 3
sed '3a\New line after 3' file.txt # Append after line 3
sed '$a\Last line' file.txt # Append at endAdvanced sed
# Multiple operations
sed -e 's/foo/bar/g' -e 's/baz/qux/g' file.txt
# Using different delimiters (useful for paths)
sed 's|/usr/local|/opt|g' config.txt
sed 's#http://#https://#g' urls.txt
# Capture groups and back-references
# Swap first and last name
sed 's/\(\w\+\) \(\w\+\)/\2, \1/' names.txt
# With extended regex:
sed -E 's/(\w+) (\w+)/\2, \1/' names.txt
# Replace between patterns
sed '/START/,/END/s/old/new/g' file.txt
# Print matches with substitution
echo "Error: file not found" | sed -n 's/Error: //p'Practical sed Examples
# Remove trailing whitespace
sed 's/[[:space:]]*$//' file.txt
# Add line numbers
sed = file.txt | sed 'N;s/\n/\t/'
# Convert DOS to Unix line endings
sed -i 's/\r$//' file.txt
# Extract value from key=value config
sed -n 's/^database_host=//p' config.ini
# Comment out lines matching pattern
sed '/pattern/s/^/# /' config.txtawk — Pattern Scanning and Processing
Basic Usage
# Print specific columns
awk '{print $1}' file.txt # First column
awk '{print $1, $3}' file.txt # First and third columns
awk '{print $NF}' file.txt # Last column
# Custom field separator
awk -F: '{print $1, $3}' /etc/passwd
awk -F, '{print $1, $2}' data.csv
# Pattern matching
awk '/error/ {print}' logfile.txt
awk '$3 > 100 {print $1, $3}' data.txtBuilt-in Variables
# NR = Record (line) number
awk '{print NR, $0}' file.txt
# NF = Number of fields
awk '{print NF, $0}' file.txt
# FS = Field separator
awk 'BEGIN{FS=","} {print $1}' data.csv
# OFS = Output field separator
awk -F: 'BEGIN{OFS="\t"} {print $1, $3}' /etc/passwd
# RS = Record separator (default: newline)
# ORS = Output record separatorPatterns and Actions
# BEGIN and END blocks
awk '
BEGIN { print "=== Report ===" }
{ print NR": "$0 }
END { print "Total lines: "NR }
' file.txt
# Conditional patterns
awk '$3 > 1000 {print $1, $3}' data.txt
awk 'NR >= 5 && NR <= 10' file.txt
awk '/start/,/end/' file.txt # Range pattern
# If/else
awk '{
if ($3 > 90)
print $1, "A"
else if ($3 > 80)
print $1, "B"
else
print $1, "C"
}' grades.txtCalculations
# Sum a column
awk '{sum += $3} END {print "Total:", sum}' data.txt
# Average
awk '{sum += $3; count++} END {print "Avg:", sum/count}' data.txt
# Min and Max
awk 'NR==1 || $3 > max {max=$3} END {print "Max:", max}' data.txt
# Count occurrences
awk '{count[$1]++} END {for (k in count) print k, count[k]}' access.logPractical awk Examples
# Process /etc/passwd
awk -F: '$3 >= 1000 {printf "%-20s UID: %s\n", $1, $3}' /etc/passwd
# Disk usage report
df -h | awk 'NR>1 {printf "%-20s %s used of %s\n", $6, $5, $2}'
# CSV to formatted table
awk -F, '
NR==1 {
for(i=1;i<=NF;i++) header[i]=$i
next
}
{
for(i=1;i<=NF;i++) printf "%s: %s\n", header[i], $i
print "---"
}' data.csv
# Web server log analysis
awk '{
status[$9]++
}
END {
for (code in status)
printf "%s: %d requests\n", code, status[code]
}' access.log | sort -t: -k2 -rnCombining grep, sed, and awk
# Extract and format data from a log file
grep "ERROR" app.log |
sed 's/.*\[ERROR\] //' |
awk -F: '{count[$1]++} END {
for (k in count)
printf "%-30s %d occurrences\n", k, count[k]
}' |
sort -t' ' -k2 -rn
# Process configuration file
grep -v "^#\|^$" config.ini |
sed 's/[[:space:]]*=[[:space:]]*/=/' |
awk -F= '{printf "%-20s → %s\n", $1, $2}'Exercises
- Use grep to find all functions defined in a Bash script file
- Use sed to convert a CSV file to a Markdown table
- Use awk to calculate total and average from a numeric column
- Build a log analyzer that reports top IPs, most common errors, and traffic per hour
- Create a one-liner that processes
/etc/passwdand shows only human user accounts
Next: Process Management — control running processes!