Skip to content

Commit 95e4fd4

Browse files
committed
Added a script called nrofftomdconversion.py that will convert nroff files to md files with only occasional hiccups. Note: need to install pandoc to run the script
Signed-off-by: Colton Kammes <[email protected]>
1 parent ee3bd58 commit 95e4fd4

File tree

1 file changed

+333
-0
lines changed

1 file changed

+333
-0
lines changed

nrofftomdconversion.py

Lines changed: 333 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,333 @@
1+
import os # os.system(command)
2+
import argparse
3+
import re
4+
5+
os.chdir("man3")
6+
7+
# determine what format of the seealso is outputted
8+
newLinks = True
9+
10+
parser = argparse.ArgumentParser()
11+
parser.add_argument('-f', dest='file', type=str, help='.3in file to convert to markdown')
12+
args = parser.parse_args()
13+
14+
# writes the lines list to the file
15+
def writeLines(lines, filename):
16+
# print them
17+
# for line in lines:
18+
# print(line, end="")
19+
# print()
20+
21+
# write them
22+
with open(filename, "w") as fh:
23+
for line in lines:
24+
fh.write(line)
25+
26+
# print("Wrote file:",filename)
27+
28+
# helper for adjustMarkdown
29+
def allUpper(line):
30+
line = line.rstrip()
31+
32+
# check if line is empty, if so, return false
33+
if len(line) == 0:
34+
return False
35+
#Some titles have punctuations like '-' in them
36+
noPuncLine = ''
37+
for letter in line:
38+
if letter.isalpha():
39+
noPuncLine+=letter
40+
# nromal operation
41+
for letter in noPuncLine:
42+
if not letter.isupper():
43+
return False
44+
45+
return True
46+
47+
# figure out what text to add to links in the see also section
48+
def addLink(mpiFile):
49+
line = ""
50+
51+
# print(mpiFile)
52+
53+
mpiFile = mpiFile.rstrip()
54+
55+
if " " in mpiFile:
56+
mpiFile = mpiFile.replace(" ", "")
57+
58+
if "\\" in mpiFile:
59+
mpiFile = mpiFile.replace("\\", "")
60+
61+
if newLinks:
62+
# Format: [`MPI_Bcast`(3)](./?file=MPI_Bcast.md)
63+
line = "[`{}(3)`](./?file={}.md)\n".format(mpiFile, mpiFile)
64+
65+
else:
66+
# Format: [`MPI_Bcast`(3)](MPI_Bcast.html)
67+
line = "[`{}(3)`]({}.html)\n".format(mpiFile, mpiFile)
68+
69+
return line
70+
71+
# helper for adjustMarkdown
72+
def startOfCodeBlock(line):
73+
if 'C' in line:
74+
return "```c\n"
75+
elif 'Fortran' in line:
76+
return "```fortran\n"
77+
78+
#Add appropriate `` around function names and parameters
79+
def adjustWords(words):
80+
for index in range(len(words)):
81+
last_mark = ''
82+
#check function names
83+
if '_' in words[index]:
84+
#Move the punctuation out of ``
85+
if(words[index][len(words[index])-1].isalnum()==False):
86+
last_mark = words[index][len(words[index])-1]
87+
words[index]=words[index][0:len(words[index])-1]
88+
words[index]='`{}`'.format(words[index])
89+
#check parameters
90+
elif words[index][0]=='*' and words[index][len(words[index])-1] == '*':
91+
if(words[index][len(words[index])-2].isalnum()==False):
92+
last_mark = words[index][len(words[index])-2]
93+
words[index]=words[index][0:len(words[index])-2]+words[index][len(words[index])-1:]
94+
words[index]=words[index].replace('*','`')
95+
#Delete unnecassary escape signs
96+
elif '\\' in words[index]:
97+
words[index]=words[index].replace('\\','')
98+
words[index]+=last_mark
99+
line = (' ').join(words)
100+
return line
101+
102+
# adds newline inside the code block if necessary
103+
def checkBreak(line):
104+
editedLine = ""
105+
# check beginning of c
106+
if "#include" in line:
107+
editedLine += "\n"
108+
# check beginning of fortran
109+
elif "USE MPI" in line:
110+
editedLine += "\n"
111+
# check beginning of fortran2008
112+
elif "USE mpi_f08" in line:
113+
editedLine += "\n"
114+
# check beginning of function in c
115+
elif " MPI_" in line:
116+
editedLine += "\n"
117+
# check beginning of function in both fortrans
118+
elif "MPI_" in line and not ':' in line:
119+
editedLine += "\n"
120+
121+
122+
# add line and return
123+
editedLine += line
124+
return editedLine
125+
126+
# reads a markdown file and calls helper function processLine to process the markdown file further
127+
def adjustMarkdown(filename):
128+
workingLines = []
129+
newLines = []
130+
fixedWidthWords = []
131+
132+
with open(filename, "r") as fh:
133+
for line in fh.readlines():
134+
workingLines.append(line)
135+
136+
inCodeBlock = False
137+
addText = False
138+
parameterLine = False
139+
#check whether it is in the name section
140+
name = False
141+
#Normal text section includes all sections except for parameterLine and Syntax
142+
normalText = False
143+
seeAlso = False
144+
for i in range(1, len(workingLines)):
145+
line = ""
146+
147+
#delete unnecassary blank lines
148+
if workingLines[i].isspace():
149+
continue
150+
# titles
151+
elif "====" in workingLines[i]:
152+
if (inCodeBlock):
153+
newLines.append("```\n")
154+
newLines.append('\n')
155+
inCodeBlock = False
156+
157+
addText = False
158+
159+
# if all caps, then heading 2
160+
if allUpper(workingLines[i-1]):
161+
if "SEE ALSO" in workingLines[i-1]:
162+
seeAlso = True
163+
#add a new line after each title
164+
if workingLines[i-1] != "NAME\n":
165+
line+='\n'
166+
line+= '# ' + workingLines[i-1].title()+'\n'
167+
168+
#Mark that this is a normal section
169+
if 'Syntax' not in line and 'Parameter' not in line:
170+
normalText = True
171+
else:
172+
normalText = False
173+
# else, heading 2
174+
else:
175+
line = '## ' + workingLines[i-1].title()+'\n'
176+
177+
# indented blocks
178+
elif " " in workingLines[i] and not normalText:
179+
# start code block
180+
inCodeBlock = True
181+
if len(newLines) > 1:
182+
if "##" in newLines[len(newLines)-1]:
183+
newLines.append(startOfCodeBlock(newLines[len(newLines)-1]))
184+
line = workingLines[i][4:]
185+
186+
else:
187+
# line = workingLines[i][4:]
188+
line = checkBreak(workingLines[i][4:])
189+
#When changing a new line in a code block, use six spaces instead of a tab
190+
if(line[0]=='\t'):
191+
line = ' '+line[1:]
192+
else:
193+
print("HERE")
194+
line = "-----------HERE----------------"
195+
196+
# non-indented blocks
197+
# check to make sure not going out of bounds
198+
elif i + 2 < len(workingLines):
199+
# get name at beginning
200+
if "**" in workingLines[i]:
201+
# line += "`"
202+
for letter in workingLines[i]:
203+
if letter != "*":
204+
line += letter
205+
# line += "`"
206+
207+
# handle ':' sections
208+
elif workingLines[i+2][0] == ':':
209+
parameterLine = True
210+
line += '* `' # ticks will not be added later
211+
# line += '* '
212+
line += workingLines[i].rstrip()
213+
line += '`'
214+
line += ' : '
215+
line += workingLines[i+2][4:]
216+
# add word to go through other lines and syntax highlight later
217+
fixedWidthWords.append(workingLines[i].rstrip())
218+
219+
# text blocks below description and errors
220+
elif len(newLines)>2:
221+
#If the text is not in a paramter or syntax section, add text
222+
if normalText:
223+
addText=True
224+
225+
# filter headers and blank lines
226+
if addText and not allUpper(workingLines[i]):
227+
# create see also links
228+
if workingLines[i][len(workingLines[i]) - 2] == '\\':
229+
# Format: [`MPI_Bcast`(3)](MPI_Bcast.html)
230+
# TODO: Make a regex find for 2 'MPI_' in the same line - if so, add 2 different lines
231+
print('HERE: ',re.findall('MPI_'),line)
232+
if len(re.findall('MPI_')) > 1:
233+
print("split lines")
234+
else:
235+
line = addLink(workingLines[i])
236+
237+
seeAlso = True
238+
239+
# normal text
240+
else:
241+
line = workingLines[i]
242+
#if a normal text is under name section, also add it to newLines
243+
elif(normalText and workingLines[i].isupper()==False):
244+
245+
line = workingLines[i]
246+
247+
248+
else:
249+
line = workingLines[i]
250+
251+
# #adjust words for each line
252+
try:
253+
# make sure not in a code block
254+
if not inCodeBlock and not parameterLine and not seeAlso:
255+
line = adjustWords(line.split(' '))
256+
except:
257+
#if the line only has one word, skip this line
258+
pass
259+
260+
261+
# make things in fixedWidthWords fixed-width font if needed
262+
if not inCodeBlock and not parameterLine and not seeAlso:
263+
# check if any of the words are in the line
264+
for word in fixedWidthWords:
265+
wordAndBuffer = ' ' + word + ' ' # adds spaces around to prevent things like `comm`unicator
266+
# go through the line
267+
if wordAndBuffer in line:
268+
line = line.replace(word, '`' + word + '`')
269+
270+
# replace any remaining tabs with spaces
271+
if "\t" in line:
272+
# print("replacing tab")
273+
line = line.replace("\t", " ")
274+
275+
# remove any unwanted backslashes
276+
if "\\" in line:
277+
line = line.replace("\\", "")
278+
279+
# get rid of all * characters that aren't required <- doesn't work if there are code blocks in the description
280+
if not inCodeBlock and not parameterLine and "*" in line:
281+
line = line.replace("*", "")
282+
283+
if seeAlso and "MPI_" in workingLines[i]:
284+
# line = addLink(workingLines[i][:-2])
285+
if len(re.findall('MPI_', line)) > 1:
286+
# print('HERE: ',re.findall('(MPI_[a-zA-Z_]+)', line),line)
287+
toAdd = re.findall('(MPI_[a-zA-Z_]+)', line)
288+
289+
for i in range(1, len(toAdd)):
290+
newLines.append(addLink(toAdd[i]))
291+
# print("split lines")
292+
293+
line = addLink(toAdd[0])
294+
295+
else:
296+
line = addLink(workingLines[i])
297+
298+
299+
# finally, add line
300+
if(line):
301+
newLines.append(line)
302+
303+
# at the end of the line, reset the line tag for the next iteration
304+
parameterLine = False
305+
306+
return newLines
307+
308+
def runPandoc(file):
309+
execLine = "pandoc {} -f man -t markdown -s -o {}".format(file, file[:-3]+"md")
310+
# print("Running:", execLine)
311+
os.system(execLine)
312+
313+
314+
'''
315+
Calls all methods to convert from .3in to md
316+
'''
317+
def convert(nroffFilename):
318+
mdFilename = nroffFilename[:-3]+"md"
319+
320+
runPandoc(nroffFilename)
321+
lines = adjustMarkdown(mdFilename)
322+
writeLines(lines, mdFilename)
323+
324+
def convertAll():
325+
for filename in os.listdir():
326+
if ".3in" in filename:
327+
try:
328+
convert(filename)
329+
except:
330+
print("Couldn't convert", filename)
331+
332+
# convert(args.file)
333+
convertAll()

0 commit comments

Comments
 (0)