1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
|
# split.awk --- do split in awk
#
# Requires getopt() library function.
#
# Arnold Robbins, arnold@skeeve.com, Public Domain
# May 1993
# Revised slightly, May 2014
# Rewritten September 2020
function usage()
{
print("usage: split [-l count] [-a suffix-len] [file [outname]]") > "/dev/stderr"
print(" split [-b N[k|m]] [-a suffix-len] [file [outname]]") > "/dev/stderr"
exit 1
}
BEGIN {
# Set defaults:
Suffix_length = 2
Line_count = 1000
Byte_count = 0
Outfile = "x"
parse_arguments()
init_suffix_data()
Output = (Outfile compute_suffix())
}
function parse_arguments( i, c, l, modifier)
{
while ((c = getopt(ARGC, ARGV, "a:b:l:")) != -1) {
if (c == "a")
Suffix_length = Optarg + 0
else if (c == "b") {
Byte_count = Optarg + 0
Line_count = 0
l = length(Optarg)
modifier = substr(Optarg, l, 1)
if (modifier == "k")
Byte_count *= 1024
else if (modifier == "m")
Byte_count *= 1024 * 1024
} else if (c == "l") {
Line_count = Optarg + 0
Byte_count = 0
} else
usage()
}
# Clear out options
for (i = 1; i < Optind; i++)
ARGV[i] = ""
# Check for filename
if (ARGV[Optind]) {
Optind++
# Check for different prefix
if (ARGV[Optind]) {
Outfile = ARGV[Optind]
ARGV[Optind] = ""
if (++Optind < ARGC)
usage()
}
}
}
function compute_suffix( i, result, letters)
{
# Logical step 3
if (Reached_last) {
printf("split: too many files!\n") > "/dev/stderr"
exit 1
} else if (on_last_file())
Reached_last = 1 # fail when wrapping after 'zzz'
# Logical step 1
result = ""
letters = "abcdefghijklmnopqrstuvwxyz"
for (i = 1; i <= Suffix_length; i++)
result = result substr(letters, Suffix_ind[i], 1)
# Logical step 2
for (i = Suffix_length; i >= 1; i--) {
if (++Suffix_ind[i] > 26) {
Suffix_ind[i] = 1
} else
break
}
return result
}
function init_suffix_data( i)
{
for (i = 1; i <= Suffix_length; i++)
Suffix_ind[i] = 1
Reached_last = 0
}
function on_last_file( i, on_last)
{
on_last = 1
for (i = 1; i <= Suffix_length; i++) {
on_last = on_last && (Suffix_ind[i] == 26)
}
return on_last
}
Line_count > 0 {
if (++tcount > Line_count) {
close(Output)
Output = (Outfile compute_suffix())
tcount = 1
}
print > Output
}
Byte_count > 0 {
# `+ 1' is for the final newline
if (tcount + length($0) + 1 > Byte_count) { # would overflow
# compute leading bytes
leading_bytes = Byte_count - tcount
# write leading bytes
printf("%s", substr($0, 1, leading_bytes)) > Output
# close old file, open new file
close(Output)
Output = (Outfile compute_suffix())
# set up first bytes for new file
$0 = substr($0, leading_bytes + 1) # trailing bytes
tcount = 0
}
# write full record or trailing bytes
tcount += length($0) + 1
print > Output
}
END {
close(Output)
}
|