-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathword-count.c
More file actions
133 lines (118 loc) · 3.19 KB
/
word-count.c
File metadata and controls
133 lines (118 loc) · 3.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#define _POSIX_C_SOURCE 200809L
#define _GNU_SOURCE
#include <ctype.h>
#include <dirent.h>
#include <err.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "kvlist.h"
#include "mr.h"
/**
* `toLowerStr` is a helper function that turns the string lowercased
*/
char *toLowerStr(char *s) {
for (char *p = s; *p; p++) {
*p = tolower(*p);
}
return s;
}
/**
* `mapper` is a function to be passed to `map_reduce`
*
* `pair->value` has one line of the text.
* `mapper` needs to read the line into words and and
* add ($WORD, "1") to the `output` list.
*/
void mapper(kvpair_t *pair, kvlist_t *output) {
char delim[] = "\t\r\n !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
char *token;
char *saveptr;
// use `strtok_r` to split the line into words and add pairs to `output`
token = strtok_r(pair->value, delim, &saveptr);
if (token == NULL) {
return;
}
kvlist_append(output, kvpair_new(toLowerStr(token), "1"));
while ((token = strtok_r(NULL, delim, &saveptr)) != NULL) {
kvlist_append(output, kvpair_new(toLowerStr(token), "1"));
}
}
/**
* `reducer` is a function to be passed to `map_reduce`
*
* `key` contains the string key to be aggregated.
* `lst` is a list of pairs. All keys in this list is the same as `key`.
*
* `reducer` adds up the numbers in the `value` field of the pairs and
*
*/
void reducer(char *key, kvlist_t *lst, kvlist_t *output) {
int sum = 0;
// iterate through lst
kvlist_iterator_t *itor = kvlist_iterator_new(lst);
for (;;) {
kvpair_t *pair = kvlist_iterator_next(itor);
if (pair == NULL) {
break;
}
// turn the value into an integer and increment the sum
sum += atoi(pair->value);
}
kvlist_iterator_free(&itor);
// turn `sum` back to a string and add to output
char sum_string[32];
sprintf(sum_string, "%d", sum);
kvlist_append(output, kvpair_new(key, sum_string));
}
int main(int argc, char **argv) {
// parse arguments
if (argc < 4) {
fprintf(stderr, "Usage: %s num_mapper num_reducer file...\n", argv[0]);
return 1;
}
int num_mapper = strtol(argv[1], NULL, 10);
int num_reducer = strtol(argv[2], NULL, 10);
if (num_mapper <= 0 || num_reducer <= 0) {
fprintf(stderr, "num_mapper and num_reducer must be positive\n");
return 1;
}
// input is a list of words
kvlist_t *input = kvlist_new();
// for each file
for (int i = 3; i < argc; i++) {
char *filename = argv[i];
// open the file
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
kvlist_free(&input);
err(1, "%s", filename);
}
// variables required for reading the file by line
char *line = NULL;
size_t len = 0;
ssize_t read;
// read the file by line
while ((read = getline(&line, &len, fp)) != -1) {
kvlist_append(input, kvpair_new(filename, line));
}
// cleanup
fclose(fp);
if (line) {
free(line);
}
line = NULL;
}
// create output list
kvlist_t *output = kvlist_new();
// call map_reduce
map_reduce(mapper, num_mapper, reducer, num_reducer, input, output);
// print out the result
kvlist_print(1, output);
// cleanup
kvlist_free(&input);
kvlist_free(&output);
return 0;
}