Strings in Data Structures & Algorithms: Complete Guide
Master string manipulation, pattern matching, and algorithm optimization. This tutorial covers string operations, encoding techniques, and common interview patterns with implementation examples.
String Operation Distribution in Real Systems
Search (35%)
Comparison (25%)
Transformation (20%)
Encoding (20%)
1. String Fundamentals
Core Characteristics:
Immutable: Original string cannot be changed (in most languages)
Character Encoding: ASCII (1 byte), Unicode (2-4 bytes)
# String reversal
def reverse_string(s):
return s[::-1]
# Palindrome check
def is_palindrome(s):
cleaned = ''.join(c.lower() for c in s if c.isalnum())
return cleaned == reverse_string(cleaned)
// Remove duplicates in-place
function removeDuplicates(str) {
const chars = str.split('');
let slow = 0;
for (let fast = 0; fast < chars.length; fast++) {
if (chars[fast] !== chars[slow]) {
chars[++slow] = chars[fast];
}
}
return chars.slice(0, slow + 1).join('');
}
# Remove duplicates in-place
def remove_duplicates(s):
chars = list(s)
slow = 0
for fast in range(len(chars)):
if chars[fast] != chars[slow]:
slow += 1
chars[slow] = chars[fast]
return ''.join(chars[:slow+1])
// Remove duplicates in-place
String removeDuplicates(String str) {
char[] chars = str.toCharArray();
int slow = 0;
for (int fast = 0; fast < chars.length; fast++) {
if (chars[fast] != chars[slow]) {
chars[++slow] = chars[fast];
}
}
return new String(chars, 0, slow + 1);
}
// Remove duplicates in-place
std::string removeDuplicates(const std::string& str) {
if (str.empty()) return "";
std::string chars = str;
int slow = 0;
for (int fast = 0; fast < chars.size(); fast++) {
if (chars[fast] != chars[slow]) {
chars[++slow] = chars[fast];
}
}
return chars.substr(0, slow + 1);
}
Sliding Window
// Longest substring without repeating characters
function longestUniqueSubstring(s) {
const map = new Map();
let max = 0, start = 0;
for (let end = 0; end < s.length; end++) {
if (map.has(s[end])) {
start = Math.max(start, map.get(s[end]) + 1);
}
map.set(s[end], end);
max = Math.max(max, end - start + 1);
}
return max;
}
# Longest substring without repeating characters
def longest_unique_substring(s):
index_map = {}
max_len = start = 0
for end, char in enumerate(s):
if char in index_map:
start = max(start, index_map[char] + 1)
index_map[char] = end
max_len = max(max_len, end - start + 1)
return max_len
// Longest substring without repeating characters
int longestUniqueSubstring(String s) {
Map map = new HashMap<>();
int maxLen = 0, start = 0;
for (int end = 0; end < s.length(); end++) {
char c = s.charAt(end);
if (map.containsKey(c)) {
start = Math.max(start, map.get(c) + 1);
}
map.put(c, end);
maxLen = Math.max(maxLen, end - start + 1);
}
return maxLen;
}
// Longest substring without repeating characters
#include <unordered_map>
#include <string>
#include <algorithm>
int longestUniqueSubstring(const std::string& s) {
std::unordered_map<char, int> map;
int maxLen = 0, start = 0;
for (int end = 0; end < s.size(); end++) {
char c = s[end];
if (map.count(c)) {
start = std::max(start, map[c] + 1);
}
map[c] = end;
maxLen = std::max(maxLen, end - start + 1);
}
return maxLen;
}
Common Problem Types:
Anagram Detection: Frequency counting
String Compression: Run-length encoding
Parentheses Validation: Stack-based
Pattern Matching: KMP algorithm
3. Advanced String Algorithms
Pattern Matching:
Knuth-Morris-Pratt (KMP)
// Build LPS array for KMP
function computeLPS(pattern) {
const lps = new Array(pattern.length).fill(0);
let len = 0, i = 1;
while (i < pattern.length) {
if (pattern[i] === pattern[len]) {
lps[i++] = ++len;
} else {
if (len !== 0) len = lps[len - 1];
else lps[i++] = 0;
}
}
return lps;
}
# Build LPS array for KMP
def compute_lps(pattern):
lps = [0] * len(pattern)
length, i = 0, 1
while i < len(pattern):
if pattern[i] == pattern[length]:
length += 1
lps[i] = length
i += 1
else:
if length != 0:
length = lps[length - 1]
else:
lps[i] = 0
i += 1
return lps
// Build LPS array for KMP
int[] computeLPS(String pattern) {
int n = pattern.length();
int[] lps = new int[n];
int len = 0, i = 1;
while (i < n) {
if (pattern.charAt(i) == pattern.charAt(len)) {
lps[i++] = ++len;
} else {
if (len != 0) len = lps[len - 1];
else lps[i++] = 0;
}
}
return lps;
}
#include <vector>
#include <string>
using namespace std;
// Build LPS array for KMP
vector<int> computeLPS(const string& pattern) {
vector<int> lps(pattern.size(), 0);
int len = 0, i = 1;
while (i < pattern.size()) {
if (pattern[i] == pattern[len]) {
lps[i++] = ++len;
} else {
if (len != 0) len = lps[len - 1];
else lps[i++] = 0;
}
}
return lps;
}
Rabin-Karp (Rolling Hash)
// Rolling hash implementation
function rabinKarp(text, pattern) {
const prime = 101;
const d = 256;
const M = pattern.length;
const N = text.length;
let p = 0, t = 0, h = 1;
// Calculate hash value for pattern and first window
for (let i = 0; i < M; i++) {
p = (d * p + pattern.charCodeAt(i)) % prime;
t = (d * t + text.charCodeAt(i)) % prime;
if (i < M - 1) h = (h * d) % prime;
}
// Slide the pattern over text
for (let i = 0; i <= N - M; i++) {
if (p === t) {
// Check characters one by one
let j;
for (j = 0; j < M; j++) {
if (text[i + j] !== pattern[j]) break;
}
if (j === M) return i;
}
// Calculate hash for next window
if (i < N - M) {
t = (d * (t - text.charCodeAt(i) * h) +
text.charCodeAt(i + M)) % prime;
if (t < 0) t += prime;
}
}
return -1;
}
# Rolling hash implementation
def rabin_karp(text, pattern):
prime = 101
d = 256
M = len(pattern)
N = len(text)
p = t = 0
h = 1
for i in range(M):
p = (d * p + ord(pattern[i])) % prime
t = (d * t + ord(text[i])) % prime
if i < M - 1:
h = (h * d) % prime
for i in range(N - M + 1):
if p == t:
if text[i:i+M] == pattern:
return i
if i < N - M:
t = (d * (t - ord(text[i]) * h) + ord(text[i + M])) % prime
if t < 0:
t += prime
return -1
// Rolling hash implementation
int rabinKarp(String text, String pattern) {
int prime = 101, d = 256;
int M = pattern.length(), N = text.length();
int p = 0, t = 0, h = 1;
for (int i = 0; i < M; i++) {
p = (d * p + pattern.charAt(i)) % prime;
t = (d * t + text.charAt(i)) % prime;
if (i < M - 1) h = (h * d) % prime;
}
for (int i = 0; i <= N - M; i++) {
if (p == t) {
int j;
for (j = 0; j < M; j++) {
if (text.charAt(i + j) != pattern.charAt(j)) break;
}
if (j == M) return i;
}
if (i < N - M) {
t = (d * (t - text.charAt(i) * h) + text.charAt(i + M)) % prime;
if (t < 0) t += prime;
}
}
return -1;
}
#include <string>
using namespace std;
// Rolling hash implementation
int rabinKarp(const string& text, const string& pattern) {
const int prime = 101, d = 256;
int M = pattern.size(), N = text.size();
int p = 0, t = 0, h = 1;
for (int i = 0; i < M; i++) {
p = (d * p + pattern[i]) % prime;
t = (d * t + text[i]) % prime;
if (i < M - 1) h = (h * d) % prime;
}
for (int i = 0; i <= N - M; i++) {
if (p == t) {
int j;
for (j = 0; j < M; j++) {
if (text[i + j] != pattern[j]) break;
}
if (j == M) return i;
}
if (i < N - M) {
t = (d * (t - text[i] * h) + text[i + M]) % prime;
if (t < 0) t += prime;
}
}
return -1;
}
class TrieNode:
def __init__(self):
self.children = {}
self.is_end = False
class TrieNode {
Map<Character, TrieNode> children;
boolean isEnd;
TrieNode() {
children = new HashMap<>();
isEnd = false;
}
}
#include <unordered_map>
using namespace std;
class TrieNode {
public:
unordered_map<char, TrieNode*> children;
bool isEnd;
TrieNode() : isEnd(false) {}
};
Run-Length Encoding
Basic compression technique
function runLengthEncode(str) {
let result = '';
let count = 1;
for (let i = 1; i <= str.length; i++) {
if (i < str.length && str[i] === str[i - 1]) {
count++;
} else {
result += str[i - 1] + count;
count = 1;
}
}
return result;
}
def run_length_encode(s):
result = ''
count = 1
for i in range(1, len(s)+1):
if i < len(s) and s[i] == s[i-1]:
count += 1
else:
result += s[i-1] + str(count)
count = 1
return result
String runLengthEncode(String str) {
StringBuilder result = new StringBuilder();
int count = 1;
for (int i = 1; i <= str.length(); i++) {
if (i < str.length() && str.charAt(i) == str.charAt(i-1)) {
count++;
} else {
result.append(str.charAt(i-1)).append(count);
count = 1;
}
}
return result.toString();
}
#include <string>
using namespace std;
string runLengthEncode(const string& str) {
string result = "";
int count = 1;
for (size_t i = 1; i <= str.length(); i++) {
if (i < str.length() && str[i] == str[i-1]) {
count++;
} else {
result += str[i-1];
result += to_string(count);
count = 1;
}
}
return result;
}
String Problem-Solving Checklist
Handle empty string and single character cases
Consider case sensitivity and Unicode
Optimize for time vs space constraints
Test edge cases (spaces, punctuation, numbers)
Algorithm Expert Insight: String manipulation accounts for 30% of technical interview questions. Mastering pattern matching and two-pointer techniques can significantly improve problem-solving efficiency.
0 Interaction
0 Views
0 Likes
You need to be logged in to participate in this discussion.