DEV Community

Masui Masanori
Masui Masanori

Posted on

[ASP.NET Core] Try reading a word processing file by OpenXML 1

Intro

This time, I will try reading a word processing file(MS Word file) by OpenXML.
I will use MS Word template to check the result.

Image description

Environments

  • .NET ver.9.0.100
  • DocumentFormat.OpenXml ver.3.1.1
  • NLog.Web.AspNetCore ver.5.3.14
  • Microsoft.AspNetCore.SpaServices.Extensions ver.9.0.0

Reading

I can get stream from IFormFile what is sent from the client-side and read as "WordprocessingDocument".

DocFileReader.cs

using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;

namespace OfficeFileAccessor.OfficeFiles.Readers;

public class DocFileReader: IOfficeFileReader
{
   private readonly NLog.Logger logger;
   public DocFileReader()
   {
      this.logger = NLog.LogManager.GetCurrentClassLogger();
   }
   public void Read(IFormFile file)
   {
      // Open file stream and read
      using WordprocessingDocument wordDoc = WordprocessingDocument.Open(file.OpenReadStream(), false);
      Body? body = wordDoc.MainDocumentPart?.Document?.Body;
      if(body == null)
      {
         logger.Warn("Failed reading the document");
         return;
      }
      // Get all elements as XML from body
      foreach(OpenXmlElement elm in body.Elements())
      {
         logger.Info($"Type: {elm.GetType()} XML: {elm.InnerXml}");
      }
   }
}
Enter fullscreen mode Exit fullscreen mode

Result

Type: DocumentFormat.OpenXml.Wordprocessing.Paragraph XML: <w:pPr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"><w:pStyle w:val="a6" /><w:jc w:val="left" /></w:pPr><w:r xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"><w:rPr><w:noProof /></w:rPr><mc:AlternateContent xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"><mc:Choice Requires="wps"><w:drawing><wp:anchor distT="0" distB="0" distL="114300" distR="114300" simplePos="0" relativeHeight="251659264" behindDoc="0" locked="0" layoutInCell="1" allowOverlap="1" wp14:editId="2621265D" wp14:anchorId="3EB8B806" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"><wp:simplePos x="0" y="0" /><wp:positionH relativeFrom="column"><wp:posOffset>485140</wp:posOffset></wp:positionH><wp:positionV relativeFrom="paragraph"><wp:posOffset>329565</wp:posOffset></wp:positionV><wp:extent cx="1495425" cy="604837" /><wp:effectExtent l="0" t="0" r="28575" b="24130" /><wp:wrapNone />
...
Enter fullscreen mode Exit fullscreen mode

I also can get inner text and style information for each element type.

Table

DocFileReader.cs

...
   public void Read(IFormFile file)
   {
...
      foreach(OpenXmlElement elm in body.Elements())
      {
         if(elm is Table table)
         {
            logger.Info("Table found:");
            ReadTableProps(table);
         }
      }
   }
   private void ReadTableProps(Table table)
   {
      // Get Table properties
      TableProperties? tableProperties = table.GetFirstChild<TableProperties>();
      if(tableProperties != null)
      {
         // Table width
         TableWidth? tableWidth = tableProperties.GetFirstChild<TableWidth>();
         logger.Info($"Table Width: {tableWidth?.Width}");
         // Table borders
         TableBorders? borders = tableProperties.GetFirstChild<TableBorders>();
         if(borders != null)
         {
               logger.Info($"Table Border Left Val: {borders.LeftBorder?.Val} Color: {borders.LeftBorder?.Color} Size: {borders.LeftBorder?.Size}");
               logger.Info($"Table Border Top Val: {borders.TopBorder?.Val} Color: {borders.TopBorder?.Color} Size: {borders.TopBorder?.Size}");
         }
      }
      // Get rows
      var rows = table.Elements<TableRow>();
      foreach (var row in rows)
      {
         // Get row properties
         TableRowProperties? rowProperties = row.GetFirstChild<TableRowProperties>();
         if(rowProperties != null)
         {
               TableRowHeight? rowHeight = rowProperties.GetFirstChild<TableRowHeight>();
               logger.Info($"Row Height: {rowHeight?.Val}");

         }
         // Get cells
         var cells = row.Elements<TableCell>();
         foreach (var cell in cells)
         {
               // Get cell texts
               string cellText = cell.InnerText;
               logger.Info($"CELL Text: {cellText}");
               // Get cell properties
               TableCellProperties? cellProperties = cell.GetFirstChild<TableCellProperties>();
               if(cellProperties != null)
               {
                  TableCellWidth? cellWidth = cellProperties.GetFirstChild<TableCellWidth>();
                  logger.Info($"Cell Width: {cellWidth?.Width}");
                  TableCellBorders? borders = cellProperties.GetFirstChild<TableCellBorders>();
                  if(borders != null)
                  {
                     logger.Info($"Cell Border Right Val: {borders.RightBorder?.Val} Color: {borders.RightBorder?.Color} Size: {borders.RightBorder?.Size}");
                     logger.Info($"Cell Border Bottom Val: {borders.BottomBorder?.Val} Color: {borders.BottomBorder?.Color} Size: {borders.BottomBorder?.Size}");
                  }
                  // Get colors
                  Shading? shading = cellProperties.GetFirstChild<Shading>();
                  if(shading != null)
                  {
                     logger.Info($"Cell BackgroundColor: {shading.Fill?.Value} Color:{shading.Color}");
                  }
               }
         }
         logger.Info("-----------");
      }
      logger.Info("\n");
   }
}
Enter fullscreen mode Exit fullscreen mode

Top comments (0)