Intro
I will try getting text styles and fonts in this time.
Getting specified styles and fonts
First, I will try getting the text styles and font what I specify them by myself.
DocFileReader.cs
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
namespace OfficeFileAccessor.OfficeFiles.Readers;
public class DocFileReader : IOfficeFileReader
{
private readonly NLog.Logger logger;
public DocFileReader()
{
this.logger = NLog.LogManager.GetCurrentClassLogger();
}
public void Read(IFormFile file)
{
using WordprocessingDocument wordDoc = WordprocessingDocument.Open(file.OpenReadStream(), false);
Body? body = wordDoc.MainDocumentPart?.Document?.Body;
if (body == null)
{
logger.Warn("Failed reading the document");
return;
}
foreach (OpenXmlElement elm in body.Elements())
{
if (elm is Table table)
{
...
}
else if (elm is Paragraph paragraph)
{
if (elm.InnerText.Trim().Length <= 0)
{
continue;
}
// Get full text from paragraph.InnerText
logger.Info($"Paragraph Text: {paragraph.InnerText}");
PrintFontInfoFromParagraph(wordDoc.MainDocumentPart, paragraph);
}
}
}
private void PrintFontInfoFromParagraph(MainDocumentPart? mainPart, Paragraph paragraph)
{
// One paragraph is separated as multiple Run elements by styles and fonts
foreach (Run run in paragraph.Elements<Run>())
{
logger.Info($"Run Text: {run.InnerText}");
// Get text style and font from RunProperties.
RunProperties? runProperties = run.RunProperties;
if (runProperties != null)
{
logger.Info($"RunProperties found:");
var fonts = runProperties.RunFonts;
if (fonts != null)
{
logger.Info($"Font Name: {GetFontName(fonts, mainPart)}");
}
if (runProperties.Color != null)
{
logger.Info($"Color: {runProperties.Color.Val}");
}
if (runProperties.Bold != null)
{
logger.Info($"Bold: {runProperties.Bold.Val}");
}
if (runProperties.FontSize == null)
{
logger.Info($"FontSize was null");
}
else if(int.TryParse(runProperties.FontSize.Val, out var size))
{
// runProperties.FontSize.Val represents half-points
logger.Info($"FontSize: {size / 2}");
}
}
logger.Info("------------");
}
}
private string GetFontName(RunFonts? runFonts, MainDocumentPart? mainPart)
{
string? result = runFonts?.Ascii ??
runFonts?.HighAnsi ??
runFonts?.EastAsia ??
runFonts?.ComplexScript;
if (string.IsNullOrEmpty(result))
{
result = "No font set";
}
return result;
}
}
Result
Paragraph Text: カスタムfontに設定した場合
Run Text: カス
RunProperties found:
Font Name: No font set
FontSize was null
------------
Run Text: タムfo
RunProperties found:
Font Name: Noto Sans JP Black
FontSize was null
------------
Run Text: ntに
RunProperties found:
Font Name: No font set
FontSize was null
------------
Run Text: 設定
RunProperties found:
Font Name: No font set
Bold:
FontSize was null
------------
Run Text: した
RunProperties found:
Font Name: Meiryo UI
FontSize: 16
------------
Run Text: 場合
RunProperties found:
Font Name: No font set
Color: 60CAF3
FontSize: 22
------------
Getting default styles and fonts
Unless I change the font, color, size, etc. by myself, the above code won't get that informations.
So I have to get them from the base style or ThemeFonts.
I can get informations of "見出し(Headline)".
But some styles like "標準(Normal)" don't have style informations, so I set the default values if I can't get "ParagraphStyleId" from the paragraph.
DocFileReader.cs
...
public class DocFileReader : IOfficeFileReader
{
private readonly NLog.Logger logger;
private enum FontType
{
Ascii = 0,
HighAnsi,
EastAsia,
Latin,
}
private enum FontPriority {
Major = 0,
Minor
}
private record ThemeFont(string? EastAsiaMajorFont, string? EastAsiaMinorFont, string? LatinMajorFont, string? LatinMinorFont);
private record TextFont (FontType FontType, string FontName);
private class TextProps
{
public List<TextFont> Fonts { get; set; } = [];
public int FontSize { get; set; } = 11;
public bool Bold { get; set; } = false;
public string Color { get; set; } = "000000";
}
...
public void Read(IFormFile file)
{
using WordprocessingDocument wordDoc = WordprocessingDocument.Open(file.OpenReadStream(), false);
Body? body = wordDoc.MainDocumentPart?.Document?.Body;
...
ThemeFont themeFont = GetThemeFont(wordDoc.MainDocumentPart);
foreach (OpenXmlElement elm in body.Elements())
{
if (elm is Table table)
{
...
}
else if (elm is Paragraph paragraph)
{
// Get full text from paragraph.InnerText
logger.Info($"Paragraph Text: {paragraph.InnerText}");
PrintFontInfoFromParagraph(wordDoc.MainDocumentPart, paragraph, themeFont);
}
}
}
/// <summary>
/// Get fonts from Theme
/// </summary>
/// <param name="mainPart"></param>
/// <returns></returns>
private ThemeFont GetThemeFont(MainDocumentPart? mainPart)
{
if (mainPart?.ThemePart == null)
{
return new(null, null, null, null);
}
var theme = mainPart.ThemePart.Theme;
var themeElements = theme.ThemeElements;
if (themeElements == null)
{
return new(null, null, null, null);
}
var majorFontScheme = themeElements.FontScheme?.MajorFont;
var minorFontScheme = themeElements.FontScheme?.MinorFont;
if(majorFontScheme == null && minorFontScheme == null)
{
return new(null, null, null, null);
}
return new ThemeFont(EastAsiaMajorFont: majorFontScheme?.EastAsianFont?.Typeface,
EastAsiaMinorFont: minorFontScheme?.EastAsianFont?.Typeface,
LatinMajorFont: majorFontScheme?.LatinFont?.Typeface,
LatinMinorFont: minorFontScheme?.LatinFont?.Typeface);
}
private void PrintFontInfoFromParagraph(MainDocumentPart? mainPart, Paragraph paragraph, ThemeFont themeFont)
{
TextProps? props = GetTextProps(mainPart, paragraph, themeFont);
// One paragraph is separated as multiple Run elements by styles and font types
foreach (Run run in paragraph.Elements<Run>())
{
logger.Info($"Run Text: {run.InnerText}");
RunProperties? runProperties = run.RunProperties;
if (runProperties == null)
{
logger.Info("runProperties was null");
}
else
{
var fonts = GetFonts(runProperties.RunFonts);
if(fonts.Count > 0)
{
foreach(var f in fonts)
{
logger.Info($"Font Name: {f.FontName} Type: {f.FontType}");
}
}
else if(props?.Fonts != null)
{
foreach(var f in props.Fonts)
{
logger.Info($"Font Name: {f.FontName} Type: {f.FontType}");
}
}
if (runProperties.Color == null)
{
if(props?.Color != null)
{
logger.Info($"Color: {props.Color}");
}
}
else
{
logger.Info($"Color: {runProperties.Color.Val}");
}
if (runProperties.Bold == null)
{
if(props?.Bold != null)
{
logger.Info($"Bold: {props.Bold}");
}
}
else
{
logger.Info($"Bold: {runProperties.Bold.Val}");
}
if (runProperties.FontSize == null)
{
if(props?.FontSize != null)
{
logger.Info($"FontSize: {props.FontSize}");
}
}
else if(int.TryParse(runProperties.FontSize.Val, out var size))
{
// runProperties.FontSize.Val represents half-points
logger.Info($"FontSize: {size / 2}");
}
}
logger.Info("------------");
}
}
/// <summary>
/// Get style and font from paragraph
/// </summary>
/// <param name="mainPart"></param>
/// <param name="paragraph"></param>
/// <param name="themeFont"></param>
/// <returns></returns>
private TextProps? GetTextProps(MainDocumentPart? mainPart, Paragraph paragraph, ThemeFont themeFont)
{
string? styleId = paragraph.ParagraphProperties?.ParagraphStyleId?.Val?.Value;
Style? style = GetStyleById(mainPart, styleId);
TextProps? result = GetTextPropsFromRunProperties(style?.StyleRunProperties, themeFont);
if(style == null)
{
return GenerateDefaultProps(themeFont);
}
else if(result == null || result.Fonts == null || result.Fonts.Count <= 0)
{
StyleRunProperties? inheritedRunProperties = GetInheritedRunProperties(style, mainPart);
if (inheritedRunProperties == null)
{
return GenerateDefaultProps(themeFont);
}
else
{
logger.Info("Inherited from Base Style:");
return GetTextPropsFromRunProperties(inheritedRunProperties, themeFont);
}
}
return result;
}
private static StyleRunProperties? GetInheritedRunProperties(Style style, MainDocumentPart? mainPart)
{
if (style.BasedOn != null)
{
string? baseStyleId = style.BasedOn.Val?.Value;
Style? baseStyle = mainPart?.StyleDefinitionsPart?.Styles?.Elements<Style>()
.FirstOrDefault(s => s.StyleId == baseStyleId);
if (baseStyle != null)
{
if (baseStyle.StyleRunProperties != null)
{
return baseStyle.StyleRunProperties;
}
else
{
return GetInheritedRunProperties(baseStyle, mainPart);
}
}
}
return null;
}
private static List<TextFont> GetFonts(RunFonts? runFonts)
{
List<TextFont> results = [];
if(string.IsNullOrEmpty(runFonts?.Ascii?.Value) == false)
{
results.Add(new TextFont(FontType.Ascii, runFonts.Ascii.Value));
}
if(string.IsNullOrEmpty(runFonts?.HighAnsi?.Value) == false)
{
results.Add(new TextFont(FontType.HighAnsi, runFonts.HighAnsi.Value));
}
if(string.IsNullOrEmpty(runFonts?.EastAsia?.Value) == false)
{
results.Add(new TextFont(FontType.EastAsia, runFonts.EastAsia.Value));
}
return results;
}
private static TextProps GenerateDefaultProps(ThemeFont themeFont)
{
// If the style cannot be gotton, return the default font information.
List<TextFont> fonts = [];
if(string.IsNullOrEmpty(themeFont.LatinMinorFont) == false)
{
fonts.Add(new(FontType.Latin, themeFont.LatinMinorFont));
}
if(string.IsNullOrEmpty(themeFont.EastAsiaMinorFont) == false)
{
fonts.Add(new(FontType.EastAsia, themeFont.EastAsiaMinorFont));
}
return new ()
{
Fonts = fonts,
};
}
private static Style? GetStyleById(MainDocumentPart? mainPart, string? styleId)
{
if(string.IsNullOrEmpty(styleId))
{
return null;
}
IEnumerable<Style>? styles = mainPart?.StyleDefinitionsPart?.Styles?.Elements<Style>();
if (styles != null)
{
return styles.FirstOrDefault(s => s.StyleId == styleId);
}
return null;
}
private TextProps? GetTextPropsFromRunProperties(StyleRunProperties? runProperties, ThemeFont themeFont)
{
if (runProperties == null)
{
return null;
}
TextProps? result = new();
var runFonts = runProperties.RunFonts;
if (runFonts != null)
{
result.Fonts = GetTextFonts(runFonts);
if(result.Fonts.Count <= 0)
{
result.Fonts = GetTextFonts(themeFont, runFonts);
}
}
if (runProperties.Color?.Val != null)
{
result.Color = runProperties.Color.Val!;
}
if (runProperties.Bold != null)
{
result.Bold = true;
}
// runProperties.FontSize.Val represents half-points
if (string.IsNullOrEmpty(runProperties.FontSize?.Val) == false &&
int.TryParse(runProperties.FontSize?.Val, out var size))
{
result.FontSize = size / 2;
}
return result;
}
/// <summary>
/// Get font name from RunFonts
/// </summary>
/// <param name="runFonts"></param>
/// <returns></returns>
private static List<TextFont> GetTextFonts(RunFonts runFonts)
{
List<TextFont> results = [];
if (runFonts.Ascii?.Value != null && runFonts.Ascii.HasValue)
{
results.Add(new TextFont(FontType.Ascii, runFonts.Ascii.Value));
}
if (runFonts.HighAnsi?.Value != null && runFonts.HighAnsi.HasValue)
{
results.Add(new TextFont(FontType.HighAnsi, runFonts.HighAnsi.Value));
}
if (runFonts.EastAsia?.Value != null && runFonts.EastAsia.HasValue)
{
results.Add(new TextFont(FontType.EastAsia, runFonts.EastAsia.Value));
}
return results;
}
/// <summary>
/// Get font name from ThemeFonts
/// </summary>
/// <param name="themeFont"></param>
/// <param name="runFonts"></param>
/// <returns></returns>
private static List<TextFont> GetTextFonts(ThemeFont themeFont, RunFonts runFonts)
{
List<TextFont> results = [];
// ThemeFont is divided into MajorFont and MinorFont.
if(runFonts.EastAsiaTheme?.Value == ThemeFontValues.MajorEastAsia)
{
if(string.IsNullOrEmpty(themeFont.LatinMajorFont) == false)
{
results.Add(new(FontType.Latin, themeFont.LatinMajorFont));
}
if(string.IsNullOrEmpty(themeFont.EastAsiaMajorFont) == false)
{
results.Add(new(FontType.EastAsia, themeFont.EastAsiaMajorFont));
}
}
else
{
if(string.IsNullOrEmpty(themeFont.LatinMinorFont) == false)
{
results.Add(new(FontType.Latin, themeFont.LatinMinorFont));
}
if(string.IsNullOrEmpty(themeFont.EastAsiaMinorFont) == false)
{
results.Add(new(FontType.EastAsia, themeFont.EastAsiaMinorFont));
}
}
return results;
}
}
Result
Found a Paragraph with text: This is みだし1
Paragraph Text: This is みだし1
Run Text: This is みだし1
Font Name: 游ゴシック Light Type: Latin
Color: 000000
Bold: False
FontSize: 16
------------
Found a Paragraph with text: あいう
Paragraph Text: あいう
Run Text: あいう
Font Name: 游明朝 Type: Latin
Color: 000000
Bold: False
FontSize: 11
------------
Found a Paragraph with text: 見出し2
Paragraph Text: 見出し2
Run Text: 見出し2
Font Name: 游ゴシック Light Type: Latin
Color: 000000
Bold: False
FontSize: 14
------------
Found a Paragraph with text: えおか
Paragraph Text: えおか
Run Text: えおか
Font Name: 游明朝 Type: Latin
Color: 000000
Bold: False
FontSize: 11
------------
Found a Paragraph with text: きくけ
Paragraph Text: きくけ
Run Text: きくけ
Font Name: 游明朝 Type: Latin
Color: 000000
Bold: False
FontSize: 11
------------
Found a Paragraph with text: こさし
Paragraph Text: こさし
Run Text: こさし
Font Name: 游明朝 Type: Latin
Color: 000000
Bold: False
FontSize: 11
------------
...
Top comments (0)